Actual source code: ptoar.c

slepc-3.7.0 2016-05-16
Report Typos and Errors
  1: /*

  3:    SLEPc polynomial eigensolver: "toar"

  5:    Method: TOAR

  7:    Algorithm:

  9:        Two-Level Orthogonal Arnoldi.

 11:    References:

 13:        [1] Y. Su, J. Zhang and Z. Bai, "A compact Arnoldi algorithm for
 14:            polynomial eigenvalue problems", talk presented at RANMEP 2008.

 16:        [2] C. Campos and J.E. Roman, "Parallel Krylov solvers for the
 17:            polynomial eigenvalue problem in SLEPc", SIAM J. Sci. Comput.
 18:            to appear, 2016.

 20:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 21:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 22:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

 24:    This file is part of SLEPc.

 26:    SLEPc is free software: you can redistribute it and/or modify it under  the
 27:    terms of version 3 of the GNU Lesser General Public License as published by
 28:    the Free Software Foundation.

 30:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 31:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 32:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 33:    more details.

 35:    You  should have received a copy of the GNU Lesser General  Public  License
 36:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 37:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 38: */

 40: #include <slepc/private/pepimpl.h>    /*I "slepcpep.h" I*/
 41:  #include ../src/pep/impls/krylov/pepkrylov.h
 42: #include <slepcblaslapack.h>

 44: static PetscBool  cited = PETSC_FALSE;
 45: static const char citation[] =
 46:   "@Article{slepc-pep,\n"
 47:   "   author = \"C. Campos and J. E. Roman\",\n"
 48:   "   title = \"Parallel {Krylov} solvers for the polynomial eigenvalue problem in {SLEPc}\",\n"
 49:   "   journal = \"{SIAM} J. Sci. Comput.\",\n"
 50:   "   volume = \"to appear\",\n"
 51:   "   number = \"\",\n"
 52:   "   pages = \"\",\n"
 53:   "   year = \"2016,\"\n"
 54:   "   doi = \"http://dx.doi.org/10.xxxx/yyyy\"\n"
 55:   "}\n";

 59: /*
 60:   Norm of [sp;sq]
 61: */
 62: static PetscErrorCode PEPTOARSNorm2(PetscInt n,PetscScalar *S,PetscReal *norm)
 63: {
 65:   PetscBLASInt   n_,one=1;

 68:   PetscBLASIntCast(n,&n_);
 69:   *norm = BLASnrm2_(&n_,S,&one);
 70:   return(0);
 71: }

 75: PetscErrorCode PEPSetUp_TOAR(PEP pep)
 76: {
 78:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
 79:   PetscBool      sinv,flg,lindep;
 80:   PetscInt       i,lds,deg=pep->nmat-1,j;
 81:   PetscReal      norm;

 84:   pep->lineariz = PETSC_TRUE;
 85:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
 86:   if (!ctx->lock && pep->mpd<pep->ncv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Should not use mpd parameter in non-locking variant");
 87:   if (!pep->max_it) pep->max_it = PetscMax(100,2*(pep->nmat-1)*pep->n/pep->ncv);
 88:   if (!pep->which) {
 89:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
 90:     if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
 91:     else pep->which = PEP_LARGEST_MAGNITUDE;
 92:   }
 93:   if (pep->problem_type!=PEP_GENERAL) {
 94:     PetscInfo(pep,"Problem type ignored, performing a non-symmetric linearization\n");
 95:   }

 97:   if (!ctx->keep) ctx->keep = 0.5;

 99:   PEPAllocateSolution(pep,pep->nmat-1);
100:   PEPSetWorkVecs(pep,3);
101:   DSSetType(pep->ds,DSNHEP);
102:   DSSetExtraRow(pep->ds,PETSC_TRUE);
103:   DSAllocate(pep->ds,pep->ncv+1);

105:   PEPBasisCoefficients(pep,pep->pbc);
106:   STGetTransform(pep->st,&flg);
107:   if (!flg) {
108:     PetscMalloc1(pep->nmat,&pep->solvematcoeffs);
109:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
110:     if (sinv) {
111:       PEPEvaluateBasis(pep,pep->target,0,pep->solvematcoeffs,NULL);
112:     } else {
113:       for (i=0;i<pep->nmat-1;i++) pep->solvematcoeffs[i] = 0.0;
114:       pep->solvematcoeffs[pep->nmat-1] = 1.0;
115:     }
116:   }
117:   ctx->ld = pep->ncv+(pep->nmat-1);   /* number of rows of each fragment of S */
118:   lds = (pep->nmat-1)*ctx->ld;
119:   PetscCalloc1(lds*ctx->ld,&ctx->S);

121:   /* process starting vector */
122:   ctx->nq = 0;
123:   for (i=0;i<deg;i++) {
124:     if (pep->nini>-deg) {
125:       BVSetRandomColumn(pep->V,ctx->nq);
126:     } else {
127:       BVInsertVec(pep->V,ctx->nq,pep->IS[i]);
128:     }
129:     BVOrthogonalizeColumn(pep->V,ctx->nq,ctx->S+i*ctx->ld,&norm,&lindep);
130:     if (!lindep) {
131:       BVScaleColumn(pep->V,ctx->nq,1.0/norm);
132:       ctx->S[ctx->nq+i*ctx->ld] = norm;
133:       ctx->nq++;
134:     }
135:   }
136:   if (ctx->nq==0) SETERRQ(PetscObjectComm((PetscObject)pep),1,"PEP: Problem with initial vector");
137:   PEPTOARSNorm2(lds,ctx->S,&norm);
138:   for (j=0;j<deg;j++) {
139:     for (i=0;i<=j;i++) ctx->S[i+j*ctx->ld] /= norm;
140:   }
141:   if (pep->nini<0) {
142:     SlepcBasisDestroy_Private(&pep->nini,&pep->IS);
143:   }
144:   return(0);
145: }

149: /*
150:  Computes GS orthogonalization   [z;x] - [Sp;Sq]*y,
151:  where y = ([Sp;Sq]'*[z;x]).
152:    k: Column from S to be orthogonalized against previous columns.
153:    Sq = Sp+ld
154:    dim(work)>=k
155: */
156: static PetscErrorCode PEPTOAROrth2(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt k,PetscScalar *y,PetscReal *norm,PetscBool *lindep,PetscScalar *work)
157: {
159:   PetscBLASInt   n_,lds_,k_,one=1;
160:   PetscScalar    sonem=-1.0,sone=1.0,szero=0.0,*x0,*x,*c;
161:   PetscInt       i,lds=deg*ld,n;
162:   PetscReal      eta,onorm;

165:   BVGetOrthogonalization(pep->V,NULL,NULL,&eta,NULL);
166:   n = k+deg-1;
167:   PetscBLASIntCast(n,&n_);
168:   PetscBLASIntCast(deg*ld,&lds_);
169:   PetscBLASIntCast(k,&k_); /* number of vectors to orthogonalize against them */
170:   c = work;
171:   x0 = S+k*lds;
172:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,y,&one));
173:   for (i=1;i<deg;i++) {
174:     x = S+i*ld+k*lds;
175:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,y,&one));
176:   }
177:   for (i=0;i<deg;i++) {
178:     x= S+i*ld+k*lds;
179:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,y,&one,&sone,x,&one));
180:   }
181:   PEPTOARSNorm2(lds,S+k*lds,&onorm);
182:   /* twice */
183:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,c,&one));
184:   for (i=1;i<deg;i++) {
185:     x = S+i*ld+k*lds;
186:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,c,&one));
187:   }
188:   for (i=0;i<deg;i++) {
189:     x= S+i*ld+k*lds;
190:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,c,&one,&sone,x,&one));
191:   }
192:   for (i=0;i<k;i++) y[i] += c[i];
193:   if (norm) {
194:     PEPTOARSNorm2(lds,S+k*lds,norm);
195:     if (lindep) *lindep = (*norm < eta * onorm)?PETSC_TRUE:PETSC_FALSE;
196:   }
197:   return(0);
198: }

202: /*
203:   Extend the TOAR basis by applying the the matrix operator
204:   over a vector which is decomposed in the TOAR way
205:   Input:
206:     - pbc: array containing the polynomial basis coefficients
207:     - S,V: define the latest Arnoldi vector (nv vectors in V)
208:   Output:
209:     - t: new vector extending the TOAR basis
210:     - r: temporary coefficients to compute the TOAR coefficients
211:          for the new Arnoldi vector
212:   Workspace: t_ (two vectors)
213: */
214: static PetscErrorCode PEPTOARExtendBasis(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscScalar *S,PetscInt ls,PetscInt nv,BV V,Vec t,PetscScalar *r,PetscInt lr,Vec *t_)
215: {
217:   PetscInt       nmat=pep->nmat,deg=nmat-1,k,j,off=0,lss;
218:   Vec            v=t_[0],ve=t_[1],q=t_[2];
219:   PetscScalar    alpha=1.0,*ss,a;
220:   PetscReal      *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
221:   PetscBool      flg;

224:   BVSetActiveColumns(pep->V,0,nv);
225:   STGetTransform(pep->st,&flg);
226:   if (sinvert) {
227:     for (j=0;j<nv;j++) {
228:       if (deg>1) r[lr+j] = S[j]/ca[0];
229:       if (deg>2) r[2*lr+j] = (S[ls+j]+(sigma-cb[1])*r[lr+j])/ca[1];
230:     }
231:     for (k=2;k<deg-1;k++) {
232:       for (j=0;j<nv;j++) r[(k+1)*lr+j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
233:     }
234:     k = deg-1;
235:     for (j=0;j<nv;j++) r[j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
236:     ss = r; lss = lr; off = 1; alpha = -1.0; a = pep->sfactor;
237:   } else {
238:     ss = S; lss = ls; off = 0; alpha = -ca[deg-1]; a = 1.0;
239:   }
240:   BVMultVec(V,1.0,0.0,v,ss+off*lss);
241:   if (pep->Dr) { /* balancing */
242:     VecPointwiseMult(v,v,pep->Dr);
243:   }
244:   STMatMult(pep->st,off,v,q);
245:   VecScale(q,a);
246:   for (j=1+off;j<deg+off-1;j++) {
247:     BVMultVec(V,1.0,0.0,v,ss+j*lss);
248:     if (pep->Dr) {
249:       VecPointwiseMult(v,v,pep->Dr);
250:     }
251:     STMatMult(pep->st,j,v,t);
252:     a *= pep->sfactor;
253:     VecAXPY(q,a,t);
254:   }
255:   if (sinvert) {
256:     BVMultVec(V,1.0,0.0,v,ss);
257:     if (pep->Dr) {
258:       VecPointwiseMult(v,v,pep->Dr);
259:     }
260:     STMatMult(pep->st,deg,v,t);
261:     a *= pep->sfactor;
262:     VecAXPY(q,a,t);
263:   } else {
264:     BVMultVec(V,1.0,0.0,ve,ss+(deg-1)*lss);
265:     if (pep->Dr) {
266:       VecPointwiseMult(ve,ve,pep->Dr);
267:     }
268:     a *= pep->sfactor;
269:     STMatMult(pep->st,deg-1,ve,t);
270:     VecAXPY(q,a,t);
271:     a *= pep->sfactor;
272:   }
273:   if (flg || !sinvert) alpha /= a;
274:   STMatSolve(pep->st,q,t);
275:   VecScale(t,alpha);
276:   if (!sinvert) {
277:     if (cg[deg-1]!=0) { VecAXPY(t,cg[deg-1],v); }
278:     if (cb[deg-1]!=0) { VecAXPY(t,cb[deg-1],ve); }
279:   }
280:   if (pep->Dr) {
281:     VecPointwiseDivide(t,t,pep->Dr);
282:   }
283:   return(0);
284: }

288: /*
289:   Compute TOAR coefficients of the blocks of the new Arnoldi vector computed
290: */
291: static PetscErrorCode PEPTOARCoefficients(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscInt nv,PetscScalar *S,PetscInt ls,PetscScalar *r,PetscInt lr,PetscScalar *x)
292: {
293:   PetscInt    k,j,nmat=pep->nmat,d=nmat-1;
294:   PetscReal   *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
295:   PetscScalar t=1.0,tp=0.0,tt;

298:   if (sinvert) {
299:     for (k=1;k<d;k++) {
300:       tt = t;
301:       t = ((sigma-cb[k-1])*t-cg[k-1]*tp)/ca[k-1]; /* k-th basis polynomial */
302:       tp = tt;
303:       for (j=0;j<=nv;j++) r[k*lr+j] += t*x[j];
304:     }
305:   } else {
306:     for (j=0;j<=nv;j++) r[j] = (cb[0]-sigma)*S[j]+ca[0]*S[ls+j];
307:     for (k=1;k<d-1;k++) {
308:       for (j=0;j<=nv;j++) r[k*lr+j] = (cb[k]-sigma)*S[k*ls+j]+ca[k]*S[(k+1)*ls+j]+cg[k]*S[(k-1)*ls+j];
309:     }
310:     if (sigma!=0.0) for (j=0;j<=nv;j++) r[(d-1)*lr+j] -= sigma*S[(d-1)*ls+j];
311:   }
312:   return(0);
313: }

317: /*
318:   Compute a run of Arnoldi iterations dim(work)=ld
319: */
320: static PetscErrorCode PEPTOARrun(PEP pep,PetscScalar sigma,PetscInt *nq,PetscScalar *S,PetscInt ld,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscScalar *work,Vec *t_)
321: {
323:   PetscInt       i,j,p,m=*M,nwu=0,deg=pep->nmat-1;
324:   PetscInt       lds=ld*deg,nqt=*nq;
325:   Vec            t;
326:   PetscReal      norm;
327:   PetscBool      flg,sinvert=PETSC_FALSE,lindep;
328:   PetscScalar    *x;

331:   STGetTransform(pep->st,&flg);
332:   if (!flg) {
333:     /* spectral transformation handled by the solver */
334:     PetscObjectTypeCompareAny((PetscObject)pep->st,&flg,STSINVERT,STSHIFT,"");
335:     if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"STtype not supported fr TOAR without transforming matrices");
336:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
337:   }
338:   for (j=k;j<m;j++) {
339:     /* apply operator */
340:     BVGetColumn(pep->V,nqt,&t);
341:     PEPTOARExtendBasis(pep,sinvert,sigma,S+j*lds,ld,nqt,pep->V,t,S+(j+1)*lds,ld,t_);
342:     BVRestoreColumn(pep->V,nqt,&t);

344:     /* orthogonalize */
345:     if (sinvert) x = S+(j+1)*lds;
346:     else x = S+(deg-1)*ld+(j+1)*lds;
347:     BVOrthogonalizeColumn(pep->V,nqt,x,&norm,&lindep);
348:     if (!lindep) {
349:       x[nqt] = norm;
350:       BVScaleColumn(pep->V,nqt,1.0/norm);
351:       nqt++;
352:     }

354:     PEPTOARCoefficients(pep,sinvert,sigma,nqt-1,S+j*lds,ld,S+(j+1)*lds,ld,x);
355:     /* level-2 orthogonalization */
356:     PEPTOAROrth2(pep,S,ld,deg,j+1,H+j*ldh,&norm,breakdown,work+nwu);
357:     H[j+1+ldh*j] = norm;
358:     *nq = nqt;
359:     if (*breakdown) {
360:       *M = j+1;
361:       break;
362:     }
363:     for (p=0;p<deg;p++) {
364:       for (i=0;i<=j+deg;i++) {
365:         S[i+p*ld+(j+1)*lds] /= norm;
366:       }
367:     }
368:   }
369:   return(0);
370: }

374: /*
375:   dim(rwork)=6*n; dim(work)=6*ld*lds+2*cs1
376: */
377: static PetscErrorCode PEPTOARTrunc(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt *rs1a,PetscInt cs1,PetscInt lock,PetscInt newc,PetscBool final,PetscScalar *work,PetscReal *rwork)
378: {
379: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
381:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD/GEQRF/ORGQR - Lapack routine is unavailable");
382: #else
384:   PetscInt       nwu=0,nrwu=0,nnc,nrow,lwa;
385:   PetscInt       j,i,k,n,lds=deg*ld,rs1=*rs1a,rk=0,offu;
386:   PetscScalar    *M,*V,*pU,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau;
387:   PetscReal      *sg,tol;
388:   PetscBLASInt   cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
389:   Mat            U;

392:   if (cs1==0) return(0);
393:   lwa = 6*ld*lds+2*cs1;
394:   n = (rs1>deg*cs1)?deg*cs1:rs1;
395:   nnc = cs1-lock-newc;
396:   nrow = rs1-lock;
397:   PetscMalloc4(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pU,deg*rs1,&tau);
398:   offu = lock*(rs1+1);
399:   M = work+nwu;
400:   nwu += rs1*cs1*deg;
401:   sg = rwork+nrwu;
402:   nrwu += n;
403:   PetscMemzero(pU,rs1*n*sizeof(PetscScalar));
404:   V = work+nwu;
405:   nwu += deg*cs1*n;
406:   PetscBLASIntCast(n,&n_);
407:   PetscBLASIntCast(nnc,&nnc_);
408:   PetscBLASIntCast(cs1,&cs1_);
409:   PetscBLASIntCast(rs1,&rs1_);
410:   PetscBLASIntCast(newc,&newc_);
411:   PetscBLASIntCast(newc*deg,&newctdeg);
412:   PetscBLASIntCast(nnc*deg,&nnctdeg);
413:   PetscBLASIntCast(cs1*deg,&cs1tdeg);
414:   PetscBLASIntCast(lwa-nwu,&lw_);
415:   PetscBLASIntCast(nrow,&nrow_);
416:   PetscBLASIntCast(lds,&lds_);
417:   if (newc>0) {
418:   /* truncate columns associated with new converged eigenpairs */
419:     for (j=0;j<deg;j++) {
420:       for (i=lock;i<lock+newc;i++) {
421:         PetscMemcpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ld+lock,nrow*sizeof(PetscScalar));
422:       }
423:     }
424: #if !defined (PETSC_USE_COMPLEX)
425:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pU+offu,&rs1_,V,&n_,work+nwu,&lw_,&info));
426: #else
427:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pU+offu,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));
428: #endif
429:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
430:     /* SVD has rank min(newc,nrow) */
431:     rk = PetscMin(newc,nrow);
432:     for (i=0;i<rk;i++) {
433:       t = sg[i];
434:       PetscStackCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,V+i,&n_));
435:     }
436:     for (i=0;i<deg;i++) {
437:       for (j=lock;j<lock+newc;j++) {
438:         PetscMemcpy(S+j*lds+i*ld+lock,V+(newc*i+j-lock)*n,rk*sizeof(PetscScalar));
439:         PetscMemzero(S+j*lds+i*ld+lock+rk,(ld-lock-rk)*sizeof(PetscScalar));
440:       }
441:     }
442:     /*
443:       update columns associated with non-converged vectors, orthogonalize
444:        against pU so that next M has rank nnc+d-1 insted of nrow+d-1
445:     */
446:     for (i=0;i<deg;i++) {
447:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pU+offu,&rs1_,S+(lock+newc)*lds+i*ld+lock,&lds_,&zero,SS+i*newc*nnc,&newc_));
448:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pU+offu,&rs1_,SS+i*newc*nnc,&newc_,&sone,S+(lock+newc)*lds+i*ld+lock,&lds_));
449:       /* repeat orthogonalization step */
450:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pU+offu,&rs1_,S+(lock+newc)*lds+i*ld+lock,&lds_,&zero,SS2,&newc_));
451:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pU+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ld+lock,&lds_));
452:       for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
453:     }
454:   }
455:   /* truncate columns associated with non-converged eigenpairs */
456:   for (j=0;j<deg;j++) {
457:     for (i=lock+newc;i<cs1;i++) {
458:       PetscMemcpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ld+lock,nrow*sizeof(PetscScalar));
459:     }
460:   }
461: #if !defined (PETSC_USE_COMPLEX)
462:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pU+offu+newc*rs1,&rs1_,V,&n_,work+nwu,&lw_,&info));
463: #else
464:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pU+offu+newc*rs1,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));
465: #endif
466:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
467:   tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
468:   for (i=0;i<PetscMin(n_,nnctdeg);i++) if (sg[i]>tol) rk++;
469:   rk = PetscMin(nnc+deg-1,rk);
470:   /* the SVD has rank (atmost) nnc+deg-1 */
471:   for (i=0;i<rk;i++) {
472:     t = sg[i];
473:     PetscStackCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,V+i,&n_));
474:   }
475:   /* update S */
476:   PetscMemzero(S+cs1*lds,(ld-cs1)*lds*sizeof(PetscScalar));
477:   k = ld-lock-newc-rk;
478:   for (i=0;i<deg;i++) {
479:     for (j=lock+newc;j<cs1;j++) {
480:       PetscMemcpy(S+j*lds+i*ld+lock+newc,V+(nnc*i+j-lock-newc)*n,rk*sizeof(PetscScalar));
481:       PetscMemzero(S+j*lds+i*ld+lock+newc+rk,k*sizeof(PetscScalar));
482:     }
483:   }
484:   if (newc>0) {
485:     for (i=0;i<deg;i++) {
486:       p = SS+nnc*newc*i;
487:       for (j=lock+newc;j<cs1;j++) {
488:         for (k=0;k<newc;k++) S[j*lds+i*ld+lock+k] = *(p++);
489:       }
490:     }
491:   }

493:   /* orthogonalize pU */
494:   rk = rk+newc;
495:   PetscBLASIntCast(rk,&rk_);
496:   PetscBLASIntCast(cs1-lock,&nnc_);
497:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pU+offu,&rs1_,tau,work+nwu,&lw_,&info));
498:   for (i=0;i<deg;i++) {
499:     PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pU+offu,&rs1_,S+lock*lds+lock+i*ld,&lds_));
500:   }
501:   PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&nrow_,&rk_,&rk_,pU+offu,&rs1_,tau,work+nwu,&lw_,&info));

503:   /* update vectors V(:,idx) = V*Q(:,idx) */
504:   rk = rk+lock;
505:   for (i=0;i<lock;i++) pU[(i+1)*rs1] = 1.0;
506:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pU,&U);
507:   BVSetActiveColumns(pep->V,lock,rs1);
508:   BVMultInPlace(pep->V,U,lock,rk);
509:   BVSetActiveColumns(pep->V,0,rk);
510:   MatDestroy(&U);
511:   *rs1a = rk;

513:   /* free work space */
514:   PetscFree4(SS,SS2,pU,tau);
515:   return(0);
516: #endif
517: }

521: /*
522:   S <- S*Q
523:   columns s-s+ncu of S
524:   rows 0-sr of S
525:   size(Q) qr x ncu
526:   dim(work)=sr*ncu
527: */
528: static PetscErrorCode PEPTOARSupdate(PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt sr,PetscInt s,PetscInt ncu,PetscInt qr,PetscScalar *Q,PetscInt ldq,PetscScalar *work)
529: {
531:   PetscScalar    a=1.0,b=0.0;
532:   PetscBLASInt   sr_,ncu_,ldq_,lds_,qr_;
533:   PetscInt       j,lds=deg*ld,i;

536:   PetscBLASIntCast(sr,&sr_);
537:   PetscBLASIntCast(qr,&qr_);
538:   PetscBLASIntCast(ncu,&ncu_);
539:   PetscBLASIntCast(lds,&lds_);
540:   PetscBLASIntCast(ldq,&ldq_);
541:   for (i=0;i<deg;i++) {
542:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S+i*ld,&lds_,Q,&ldq_,&b,work,&sr_));
543:     for (j=0;j<ncu;j++) {
544:       PetscMemcpy(S+lds*(s+j)+i*ld,work+j*sr,sr*sizeof(PetscScalar));
545:     }
546:   }
547:   return(0);
548: }

552: /*
553:   Computes T_j = phi_idx(T). In T_j and T_p are phi_{idx-1}(T)
554:    and phi_{idx-2}(T) respectively or null if idx=0,1.
555:    Tp and Tj are input/output arguments
556: */
557: static PetscErrorCode PEPEvaluateBasisM(PEP pep,PetscInt k,PetscScalar *T,PetscInt ldt,PetscInt idx,PetscScalar **Tp,PetscScalar **Tj)
558: {
560:   PetscInt       i;
561:   PetscReal      *ca,*cb,*cg;
562:   PetscScalar    *pt,g,a;
563:   PetscBLASInt   k_,ldt_;

566:   if (idx==0) {
567:     PetscMemzero(*Tj,k*k*sizeof(PetscScalar));
568:     PetscMemzero(*Tp,k*k*sizeof(PetscScalar));
569:     for (i=0;i<k;i++) (*Tj)[i+i*k] = 1.0;
570:   } else {
571:     PetscBLASIntCast(ldt,&ldt_);
572:     PetscBLASIntCast(k,&k_);
573:     ca = pep->pbc; cb = pep->pbc+pep->nmat; cg = pep->pbc+2*pep->nmat;
574:     for (i=0;i<k;i++) T[i*ldt+i] -= cb[idx-1];
575:     a = 1/ca[idx-1];
576:     g = (idx==1)?0.0:-cg[idx-1]/ca[idx-1];
577:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&a,T,&ldt_,*Tj,&k_,&g,*Tp,&k_));
578:     pt = *Tj; *Tj = *Tp; *Tp = pt;
579:     for (i=0;i<k;i++) T[i*ldt+i] += cb[idx-1];
580:   }
581:   return(0);
582: }

586: /* dim(work)=6*sr*k;*/
587: static PetscErrorCode PEPExtractInvariantPair(PEP pep,PetscScalar sigma,PetscInt sr,PetscInt k,PetscScalar *S,PetscInt ld,PetscInt deg,PetscScalar *H,PetscInt ldh,PetscScalar *work)
588: {
589: #if defined(PETSC_MISSING_LAPACK_GESV) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(PETSC_MISSING_LAPACK_GETRF)
591:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/GETRI/GETRF - Lapack routine is unavailable");
592: #else
594:   PetscInt       nw,i,j,jj,nwu=0,lds,ldt,d=pep->nmat-1,idxcpy=0;
595:   PetscScalar    *At,*Bt,*Hj,*Hp,*T,sone=1.0,g,a,*pM;
596:   PetscBLASInt   k_,sr_,lds_,ldh_,info,*p,lwork,ldt_;
597:   PetscBool      transf=PETSC_FALSE,flg;
598:   PetscReal      nrm,norm,maxnrm,*rwork;
599:   BV             *R,Y;
600:   Mat            M,*A;
601:   Vec            v;

604:   if (k==0) return(0);
605:   nw = 6*sr*k;
606:   lds = deg*ld;
607:   At = work+nwu;
608:   nwu += sr*k;
609:   Bt = work+nwu;
610:   nwu += k*k;
611:   PetscMemzero(Bt,k*k*sizeof(PetscScalar));
612:   Hj = work+nwu;
613:   nwu += k*k;
614:   Hp = work+nwu;
615:   nwu += k*k;
616:   PetscMemzero(Hp,k*k*sizeof(PetscScalar));
617:   PetscMalloc1(k,&p);
618:   PetscBLASIntCast(sr,&sr_);
619:   PetscBLASIntCast(k,&k_);
620:   PetscBLASIntCast(lds,&lds_);
621:   PetscBLASIntCast(ldh,&ldh_);
622:   STGetTransform(pep->st,&flg);
623:   if (!flg) {
624:      PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&flg);
625:     if (flg || sigma!=0.0) transf=PETSC_TRUE;
626:   }
627:   if (transf) {
628:     ldt = k;
629:     T = work+nwu;
630:     nwu += k*k;
631:     for (i=0;i<k;i++) {
632:       PetscMemcpy(T+k*i,H+i*ldh,k*sizeof(PetscScalar));
633:     }
634:     if (flg) {
635:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,T,&k_,p,&info));
636:       PetscBLASIntCast(nw-nwu,&lwork);
637:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,T,&k_,p,work+nwu,&lwork,&info));
638:     }
639:     if (sigma!=0.0) for (i=0;i<k;i++) T[i+k*i] += sigma;
640:   } else {
641:     T = H; ldt = ldh;
642:   }
643:   PetscBLASIntCast(ldt,&ldt_);
644:   switch (pep->extract) {
645:   case PEP_EXTRACT_NONE:
646:     break;
647:   case PEP_EXTRACT_NORM:
648:     if (pep->basis == PEP_BASIS_MONOMIAL) {
649:       PetscBLASIntCast(ldt,&ldt_);
650:       PetscMalloc1(k,&rwork);
651:       norm = LAPACKlange_("F",&k_,&k_,T,&ldt_,rwork);
652:       PetscFree(rwork);
653:       if (norm>1.0) idxcpy = d-1;
654:     } else {
655:       PetscBLASIntCast(ldt,&ldt_);
656:       PetscMalloc1(k,&rwork);
657:       maxnrm = 0.0;
658:       for (i=0;i<pep->nmat-1;i++) {
659:         PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
660:         norm = LAPACKlange_("F",&k_,&k_,Hj,&k_,rwork);
661:         if (norm > maxnrm) {
662:           idxcpy = i;
663:           maxnrm = norm;
664:         }
665:       }
666:       PetscFree(rwork);
667:     }
668:     if (idxcpy>0) {
669:       /* copy block idxcpy of S to the first one */
670:       for (j=0;j<k;j++) {
671:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
672:       }
673:     }
674:     break;
675:   case PEP_EXTRACT_RESIDUAL:
676:     STGetTransform(pep->st,&flg);
677:     if (flg) {
678:       PetscMalloc1(pep->nmat,&A);
679:       for (i=0;i<pep->nmat;i++) {
680:         STGetTOperators(pep->st,i,A+i);
681:       }
682:     } else A = pep->A;
683:     PetscMalloc1(pep->nmat-1,&R);
684:     for (i=0;i<pep->nmat-1;i++) {
685:       BVDuplicateResize(pep->V,k,R+i);
686:     }
687:     BVDuplicateResize(pep->V,sr,&Y);
688:     MatCreateSeqDense(PETSC_COMM_SELF,sr,k,NULL,&M);
689:     g = 0.0; a = 1.0;
690:     BVSetActiveColumns(pep->V,0,sr);
691:     for (j=0;j<pep->nmat;j++) {
692:       BVMatMult(pep->V,A[j],Y);
693:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
694:       for (i=0;i<pep->nmat-1;i++) {
695:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&k_,&k_,&a,S+i*ld,&lds_,Hj,&k_,&g,At,&sr_));
696:         MatDenseGetArray(M,&pM);
697:         for (jj=0;jj<k;jj++) {
698:           PetscMemcpy(pM+jj*sr,At+jj*sr,sr*sizeof(PetscScalar));
699:         }
700:         MatDenseRestoreArray(M,&pM);
701:         BVMult(R[i],1.0,(i==0)?0.0:1.0,Y,M);
702:       }
703:     }

705:     /* frobenius norm */
706:     maxnrm = 0.0;
707:     for (i=0;i<pep->nmat-1;i++) {
708:       norm = 0.0;
709:       for (j=0;j<k;j++) {
710:         BVGetColumn(R[i],j,&v);
711:         VecNorm(v,NORM_2,&nrm);
712:         BVRestoreColumn(R[i],j,&v);
713:         norm += nrm*nrm;
714:       }
715:       norm = PetscSqrtReal(norm);
716:       if (maxnrm > norm) {
717:         maxnrm = norm;
718:         idxcpy = i;
719:       }
720:     }
721:     if (idxcpy>0) {
722:       /* copy block idxcpy of S to the first one */
723:       for (j=0;j<k;j++) {
724:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
725:       }
726:     }
727:     if (flg) PetscFree(A);
728:     for (i=0;i<pep->nmat-1;i++) {
729:       BVDestroy(&R[i]);
730:     }
731:     PetscFree(R);
732:     BVDestroy(&Y);
733:     MatDestroy(&M);
734:     break;
735:   case PEP_EXTRACT_STRUCTURED:
736:     for (j=0;j<k;j++) Bt[j+j*k] = 1.0;
737:     for (j=0;j<sr;j++) {
738:       for (i=0;i<k;i++) At[j*k+i] = PetscConj(S[i*lds+j]);
739:     }
740:     PEPEvaluateBasisM(pep,k,T,ldt,0,&Hp,&Hj);
741:     for (i=1;i<deg;i++) {
742:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
743:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&sr_,&k_,&sone,Hj,&k_,S+i*ld,&lds_,&sone,At,&k_));
744:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&k_,&k_,&sone,Hj,&k_,Hj,&k_,&sone,Bt,&k_));
745:     }
746:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&sr_,Bt,&k_,p,At,&k_,&info));
747:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESV %d",info);
748:     for (j=0;j<sr;j++) {
749:       for (i=0;i<k;i++) S[i*lds+j] = PetscConj(At[j*k+i]);
750:     }
751:     break;
752:   default:
753:     SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Extraction not implemented in this solver");
754:   }
755:   PetscFree(p);
756:   return(0);
757: #endif
758: }

762: PetscErrorCode PEPSolve_TOAR(PEP pep)
763: {
765:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
766:   PetscInt       i,j,k,l,nv=0,ld,lds,off,ldds,newn,nq=ctx->nq,nconv=0,locked=0,newc;
767:   PetscInt       lwa,lrwa,nwu=0,nrwu=0,nmat=pep->nmat,deg=nmat-1;
768:   PetscScalar    *S,*Q,*work,*H,sigma;
769:   PetscReal      beta,*rwork;
770:   PetscBool      breakdown=PETSC_FALSE,flg,falselock=PETSC_FALSE,sinv=PETSC_FALSE;

773:   PetscCitationsRegister(citation,&cited);
774:   if (ctx->lock) {
775:     PetscOptionsGetBool(NULL,NULL,"-pep_toar_falselocking",&falselock,NULL);
776:   }
777:   ld = ctx->ld;
778:   S = ctx->S;
779:   lds = deg*ld;        /* leading dimension of S */
780:   lwa = (deg+6)*ld*lds;
781:   lrwa = 7*lds;
782:   PetscMalloc2(lwa,&work,lrwa,&rwork);
783:   DSGetLeadingDimension(pep->ds,&ldds);
784:   STGetShift(pep->st,&sigma);

786:   /* update polynomial basis coefficients */
787:   STGetTransform(pep->st,&flg);
788:   if (pep->sfactor!=1.0) {
789:     for (i=0;i<nmat;i++) {
790:       pep->pbc[nmat+i] /= pep->sfactor;
791:       pep->pbc[2*nmat+i] /= pep->sfactor*pep->sfactor;
792:     }
793:     if (!flg) {
794:       pep->target /= pep->sfactor;
795:       RGPushScale(pep->rg,1.0/pep->sfactor);
796:       STScaleShift(pep->st,1.0/pep->sfactor);
797:       sigma /= pep->sfactor;
798:     } else {
799:       PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
800:       RGPushScale(pep->rg,sinv?pep->sfactor:1.0/pep->sfactor);
801:       STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);
802:     }
803:   }

805:   if (flg) sigma = 0.0;

807:   /* restart loop */
808:   l = 0;
809:   while (pep->reason == PEP_CONVERGED_ITERATING) {
810:     pep->its++;

812:     /* compute an nv-step Lanczos factorization */
813:     nv = PetscMax(PetscMin(nconv+pep->mpd,pep->ncv),nv);
814:     DSGetArray(pep->ds,DS_MAT_A,&H);
815:     PEPTOARrun(pep,sigma,&nq,S,ld,H,ldds,pep->nconv+l,&nv,&breakdown,work+nwu,pep->work);
816:     beta = PetscAbsScalar(H[(nv-1)*ldds+nv]);
817:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
818:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
819:     if (l==0) {
820:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
821:     } else {
822:       DSSetState(pep->ds,DS_STATE_RAW);
823:     }

825:     /* solve projected problem */
826:     DSSolve(pep->ds,pep->eigr,pep->eigi);
827:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
828:     DSUpdateExtraRow(pep->ds);

830:     /* check convergence */
831:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,nv-pep->nconv,beta,&k);
832:     (*pep->stopping)(pep,pep->its,pep->max_it,k,pep->nev,&pep->reason,pep->stoppingctx);

834:     /* update l */
835:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
836:     else {
837:       l = (nv==k)?0:PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
838:       if (!breakdown) {
839:         /* prepare the Rayleigh quotient for restart */
840:         DSTruncate(pep->ds,k+l);
841:         DSGetDimensions(pep->ds,&newn,NULL,NULL,NULL,NULL);
842:         l = newn-k;
843:       }
844:     }
845:     nconv = k;
846:     if (!ctx->lock && pep->reason == PEP_CONVERGED_ITERATING && !breakdown) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

848:     /* update S */
849:     off = pep->nconv*ldds;
850:     DSGetArray(pep->ds,DS_MAT_Q,&Q);
851:     PEPTOARSupdate(S,ld,deg,nq,pep->nconv,k+l-pep->nconv,nv,Q+off,ldds,work+nwu);
852:     DSRestoreArray(pep->ds,DS_MAT_Q,&Q);

854:     /* copy last column of S */
855:     PetscMemcpy(S+lds*(k+l),S+lds*nv,lds*sizeof(PetscScalar));

857:     if (breakdown) {
858:       /* stop if breakdown */
859:       PetscInfo2(pep,"Breakdown TOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
860:       pep->reason = PEP_DIVERGED_BREAKDOWN;
861:     }
862:     if (pep->reason != PEP_CONVERGED_ITERATING) {l--; flg = PETSC_TRUE;}
863:     else flg = PETSC_FALSE;
864:     /* truncate S */
865:     if (k+l+deg<nq) {
866:       if (!falselock && ctx->lock) {
867:         newc = k-pep->nconv;
868:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,locked,newc,flg,work+nwu,rwork+nrwu);
869:         locked += newc;
870:       } else {
871:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,0,0,flg,work+nwu,rwork+nrwu);
872:       }
873:     }
874:     pep->nconv = k;
875:     PEPMonitor(pep,pep->its,nconv,pep->eigr,pep->eigi,pep->errest,nv);
876:   }
877:   if (pep->nconv>0) {
878:     /* {V*S_nconv^i}_{i=0}^{d-1} has rank nconv instead of nconv+d-1. Force zeros in each S_nconv^i block */
879:     nq = pep->nconv;

881:     /* perform Newton refinement if required */
882:     if (pep->refine==PEP_REFINE_MULTIPLE && pep->rits>0) {
883:       /* extract invariant pair */
884:       DSGetArray(pep->ds,DS_MAT_A,&H);
885:       PEPExtractInvariantPair(pep,sigma,nq,pep->nconv,S,ld,deg,H,ldds,work+nwu);
886:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
887:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
888:       DSSetState(pep->ds,DS_STATE_RAW);
889:       PEPNewtonRefinement_TOAR(pep,sigma,&pep->rits,NULL,pep->nconv,S,lds,&nq);
890:       DSSolve(pep->ds,pep->eigr,pep->eigi);
891:       DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
892:       DSGetArray(pep->ds,DS_MAT_Q,&Q);
893:       PEPTOARSupdate(S,ld,deg,nq,0,pep->nconv,pep->nconv,Q,ldds,work+nwu);
894:       DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
895:     } else {
896:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
897:       DSSetState(pep->ds,DS_STATE_RAW);
898:     }
899:   }
900:   if (pep->refine!=PEP_REFINE_MULTIPLE || pep->rits==0) {
901:     STGetTransform(pep->st,&flg);
902:     if (!flg) {
903:       if (pep->ops->backtransform) {
904:         (*pep->ops->backtransform)(pep);
905:       }
906:       /* restore original values */
907:       pep->target *= pep->sfactor;
908:       STScaleShift(pep->st,pep->sfactor);
909:     } else {
910:       STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
911:     }
912:     if (pep->sfactor!=1.0) {
913:       for (j=0;j<pep->nconv;j++) {
914:         pep->eigr[j] *= pep->sfactor;
915:         pep->eigi[j] *= pep->sfactor;
916:       }
917:       /* restore original values */
918:       for (i=0;i<pep->nmat;i++){
919:         pep->pbc[pep->nmat+i] *= pep->sfactor;
920:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
921:       }
922:     }
923:   }
924:   if (pep->sfactor!=1.0) { RGPopScale(pep->rg); }

926:   /* change the state to raw so that DSVectors() computes eigenvectors from scratch */
927:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
928:   DSSetState(pep->ds,DS_STATE_RAW);

930:   PetscFree2(work,rwork);
931:   return(0);
932: }

936: static PetscErrorCode PEPTOARSetRestart_TOAR(PEP pep,PetscReal keep)
937: {
938:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

941:   if (keep==PETSC_DEFAULT) ctx->keep = 0.5;
942:   else {
943:     if (keep<0.1 || keep>0.9) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_OUTOFRANGE,"The keep argument must be in the range [0.1,0.9]");
944:     ctx->keep = keep;
945:   }
946:   return(0);
947: }

951: /*@
952:    PEPTOARSetRestart - Sets the restart parameter for the TOAR
953:    method, in particular the proportion of basis vectors that must be kept
954:    after restart.

956:    Logically Collective on PEP

958:    Input Parameters:
959: +  pep  - the eigenproblem solver context
960: -  keep - the number of vectors to be kept at restart

962:    Options Database Key:
963: .  -pep_toar_restart - Sets the restart parameter

965:    Notes:
966:    Allowed values are in the range [0.1,0.9]. The default is 0.5.

968:    Level: advanced

970: .seealso: PEPTOARGetRestart()
971: @*/
972: PetscErrorCode PEPTOARSetRestart(PEP pep,PetscReal keep)
973: {

979:   PetscTryMethod(pep,"PEPTOARSetRestart_C",(PEP,PetscReal),(pep,keep));
980:   return(0);
981: }

985: static PetscErrorCode PEPTOARGetRestart_TOAR(PEP pep,PetscReal *keep)
986: {
987:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

990:   *keep = ctx->keep;
991:   return(0);
992: }

996: /*@
997:    PEPTOARGetRestart - Gets the restart parameter used in the TOAR method.

999:    Not Collective

1001:    Input Parameter:
1002: .  pep - the eigenproblem solver context

1004:    Output Parameter:
1005: .  keep - the restart parameter

1007:    Level: advanced

1009: .seealso: PEPTOARSetRestart()
1010: @*/
1011: PetscErrorCode PEPTOARGetRestart(PEP pep,PetscReal *keep)
1012: {

1018:   PetscUseMethod(pep,"PEPTOARGetRestart_C",(PEP,PetscReal*),(pep,keep));
1019:   return(0);
1020: }

1024: static PetscErrorCode PEPTOARSetLocking_TOAR(PEP pep,PetscBool lock)
1025: {
1026:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

1029:   ctx->lock = lock;
1030:   return(0);
1031: }

1035: /*@
1036:    PEPTOARSetLocking - Choose between locking and non-locking variants of
1037:    the TOAR method.

1039:    Logically Collective on PEP

1041:    Input Parameters:
1042: +  pep  - the eigenproblem solver context
1043: -  lock - true if the locking variant must be selected

1045:    Options Database Key:
1046: .  -pep_toar_locking - Sets the locking flag

1048:    Notes:
1049:    The default is to lock converged eigenpairs when the method restarts.
1050:    This behaviour can be changed so that all directions are kept in the
1051:    working subspace even if already converged to working accuracy (the
1052:    non-locking variant).

1054:    Level: advanced

1056: .seealso: PEPTOARGetLocking()
1057: @*/
1058: PetscErrorCode PEPTOARSetLocking(PEP pep,PetscBool lock)
1059: {

1065:   PetscTryMethod(pep,"PEPTOARSetLocking_C",(PEP,PetscBool),(pep,lock));
1066:   return(0);
1067: }

1071: static PetscErrorCode PEPTOARGetLocking_TOAR(PEP pep,PetscBool *lock)
1072: {
1073:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

1076:   *lock = ctx->lock;
1077:   return(0);
1078: }

1082: /*@
1083:    PEPTOARGetLocking - Gets the locking flag used in the TOAR method.

1085:    Not Collective

1087:    Input Parameter:
1088: .  pep - the eigenproblem solver context

1090:    Output Parameter:
1091: .  lock - the locking flag

1093:    Level: advanced

1095: .seealso: PEPTOARSetLocking()
1096: @*/
1097: PetscErrorCode PEPTOARGetLocking(PEP pep,PetscBool *lock)
1098: {

1104:   PetscUseMethod(pep,"PEPTOARGetLocking_C",(PEP,PetscBool*),(pep,lock));
1105:   return(0);
1106: }

1110: PetscErrorCode PEPSetFromOptions_TOAR(PetscOptionItems *PetscOptionsObject,PEP pep)
1111: {
1113:   PetscBool      flg,lock;
1114:   PetscReal      keep;

1117:   PetscOptionsHead(PetscOptionsObject,"PEP TOAR Options");
1118:   PetscOptionsReal("-pep_toar_restart","Proportion of vectors kept after restart","PEPTOARSetRestart",0.5,&keep,&flg);
1119:   if (flg) {
1120:     PEPTOARSetRestart(pep,keep);
1121:   }
1122:   PetscOptionsBool("-pep_toar_locking","Choose between locking and non-locking variants","PEPTOARSetLocking",PETSC_FALSE,&lock,&flg);
1123:   if (flg) {
1124:     PEPTOARSetLocking(pep,lock);
1125:   }
1126:   PetscOptionsTail();
1127:   return(0);
1128: }

1132: PetscErrorCode PEPView_TOAR(PEP pep,PetscViewer viewer)
1133: {
1135:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
1136:   PetscBool      isascii;

1139:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
1140:   if (isascii) {
1141:     PetscViewerASCIIPrintf(viewer,"  TOAR: %d%% of basis vectors kept after restart\n",(int)(100*ctx->keep));
1142:     PetscViewerASCIIPrintf(viewer,"  TOAR: using the %slocking variant\n",ctx->lock?"":"non-");
1143:   }
1144:   return(0);
1145: }

1149: PetscErrorCode PEPDestroy_TOAR(PEP pep)
1150: {

1154:   PetscFree(pep->data);
1155:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",NULL);
1156:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",NULL);
1157:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",NULL);
1158:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",NULL);
1159:   return(0);
1160: }

1164: PETSC_EXTERN PetscErrorCode PEPCreate_TOAR(PEP pep)
1165: {
1166:   PEP_TOAR       *ctx;

1170:   PetscNewLog(pep,&ctx);
1171:   pep->data = (void*)ctx;
1172:   ctx->lock = PETSC_TRUE;

1174:   pep->ops->solve          = PEPSolve_TOAR;
1175:   pep->ops->setup          = PEPSetUp_TOAR;
1176:   pep->ops->setfromoptions = PEPSetFromOptions_TOAR;
1177:   pep->ops->destroy        = PEPDestroy_TOAR;
1178:   pep->ops->view           = PEPView_TOAR;
1179:   pep->ops->backtransform  = PEPBackTransform_Default;
1180:   pep->ops->computevectors = PEPComputeVectors_Default;
1181:   pep->ops->extractvectors = PEPExtractVectors_TOAR;
1182:   pep->ops->reset          = PEPReset_TOAR;
1183:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",PEPTOARSetRestart_TOAR);
1184:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",PEPTOARGetRestart_TOAR);
1185:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",PEPTOARSetLocking_TOAR);
1186:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",PEPTOARGetLocking_TOAR);
1187:   return(0);
1188: }