Actual source code: ptoar.c

slepc-3.8.2 2017-12-01
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc polynomial eigensolver: "toar"

 13:    Method: TOAR

 15:    Algorithm:

 17:        Two-Level Orthogonal Arnoldi.

 19:    References:

 21:        [1] Y. Su, J. Zhang and Z. Bai, "A compact Arnoldi algorithm for
 22:            polynomial eigenvalue problems", talk presented at RANMEP 2008.

 24:        [2] C. Campos and J.E. Roman, "Parallel Krylov solvers for the
 25:            polynomial eigenvalue problem in SLEPc", SIAM J. Sci. Comput.
 26:            to appear, 2016.
 27: */

 29: #include <slepc/private/pepimpl.h>    /*I "slepcpep.h" I*/
 30: #include "../src/pep/impls/krylov/pepkrylov.h"
 31: #include <slepcblaslapack.h>

 33: static PetscBool  cited = PETSC_FALSE;
 34: static const char citation[] =
 35:   "@Article{slepc-pep,\n"
 36:   "   author = \"C. Campos and J. E. Roman\",\n"
 37:   "   title = \"Parallel {Krylov} solvers for the polynomial eigenvalue problem in {SLEPc}\",\n"
 38:   "   journal = \"{SIAM} J. Sci. Comput.\",\n"
 39:   "   volume = \"38\",\n"
 40:   "   number = \"5\",\n"
 41:   "   pages = \"S385--S411\",\n"
 42:   "   year = \"2016,\"\n"
 43:   "   doi = \"https://doi.org/10.1137/15M1022458\"\n"
 44:   "}\n";

 46: /*
 47:   Norm of [sp;sq]
 48: */
 49: static PetscErrorCode PEPTOARSNorm2(PetscInt n,PetscScalar *S,PetscReal *norm)
 50: {
 52:   PetscBLASInt   n_,one=1;

 55:   PetscBLASIntCast(n,&n_);
 56:   *norm = BLASnrm2_(&n_,S,&one);
 57:   return(0);
 58: }

 60: PetscErrorCode PEPSetUp_TOAR(PEP pep)
 61: {
 63:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
 64:   PetscBool      shift,sinv,flg;
 65:   PetscInt       i,lds;

 68:   pep->lineariz = PETSC_TRUE;
 69:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
 70:   if (!ctx->lock && pep->mpd<pep->ncv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Should not use mpd parameter in non-locking variant");
 71:   if (!pep->max_it) pep->max_it = PetscMax(100,2*(pep->nmat-1)*pep->n/pep->ncv);

 73:   PetscObjectTypeCompare((PetscObject)pep->st,STSHIFT,&shift);
 74:   PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
 75:   if (!shift && !sinv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Only STSHIFT and STSINVERT spectral transformations can be used");
 76:   if (!pep->which) {
 77:     if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
 78:     else pep->which = PEP_LARGEST_MAGNITUDE;
 79:   }
 80:   if (pep->problem_type!=PEP_GENERAL) {
 81:     PetscInfo(pep,"Problem type ignored, performing a non-symmetric linearization\n");
 82:   }

 84:   if (!ctx->keep) ctx->keep = 0.5;

 86:   PEPAllocateSolution(pep,pep->nmat-1);
 87:   PEPSetWorkVecs(pep,3);
 88:   DSSetType(pep->ds,DSNHEP);
 89:   DSSetExtraRow(pep->ds,PETSC_TRUE);
 90:   DSAllocate(pep->ds,pep->ncv+1);

 92:   PEPBasisCoefficients(pep,pep->pbc);
 93:   STGetTransform(pep->st,&flg);
 94:   if (!flg) {
 95:     PetscMalloc1(pep->nmat,&pep->solvematcoeffs);
 96:     PetscLogObjectMemory((PetscObject)pep,pep->nmat*sizeof(PetscScalar));
 97:     if (sinv) {
 98:       PEPEvaluateBasis(pep,pep->target,0,pep->solvematcoeffs,NULL);
 99:     } else {
100:       for (i=0;i<pep->nmat-1;i++) pep->solvematcoeffs[i] = 0.0;
101:       pep->solvematcoeffs[pep->nmat-1] = 1.0;
102:     }
103:   }
104:   ctx->ld = pep->ncv+(pep->nmat-1);   /* number of rows of each fragment of S */
105:   lds = (pep->nmat-1)*ctx->ld;
106:   if (ctx->S) { PetscFree(ctx->S); }
107:   PetscCalloc1(lds*ctx->ld,&ctx->S);
108:   return(0);
109: }

111: /*
112:  Computes GS orthogonalization   [z;x] - [Sp;Sq]*y,
113:  where y = ([Sp;Sq]'*[z;x]).
114:    k: Column from S to be orthogonalized against previous columns.
115:    Sq = Sp+ld
116:    dim(work)>=k
117: */
118: static PetscErrorCode PEPTOAROrth2(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt k,PetscScalar *y,PetscReal *norm,PetscBool *lindep,PetscScalar *work)
119: {
121:   PetscBLASInt   n_,lds_,k_,one=1;
122:   PetscScalar    sonem=-1.0,sone=1.0,szero=0.0,*x0,*x,*c;
123:   PetscInt       i,lds=deg*ld,n;
124:   PetscReal      eta,onorm;

127:   BVGetOrthogonalization(pep->V,NULL,NULL,&eta,NULL);
128:   n = k+deg-1;
129:   PetscBLASIntCast(n,&n_);
130:   PetscBLASIntCast(deg*ld,&lds_);
131:   PetscBLASIntCast(k,&k_); /* number of vectors to orthogonalize against them */
132:   c = work;
133:   x0 = S+k*lds;
134:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,y,&one));
135:   for (i=1;i<deg;i++) {
136:     x = S+i*ld+k*lds;
137:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,y,&one));
138:   }
139:   for (i=0;i<deg;i++) {
140:     x= S+i*ld+k*lds;
141:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,y,&one,&sone,x,&one));
142:   }
143:   PEPTOARSNorm2(lds,S+k*lds,&onorm);
144:   /* twice */
145:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,c,&one));
146:   for (i=1;i<deg;i++) {
147:     x = S+i*ld+k*lds;
148:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,c,&one));
149:   }
150:   for (i=0;i<deg;i++) {
151:     x= S+i*ld+k*lds;
152:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,c,&one,&sone,x,&one));
153:   }
154:   for (i=0;i<k;i++) y[i] += c[i];
155:   if (norm) {
156:     PEPTOARSNorm2(lds,S+k*lds,norm);
157:     if (lindep) *lindep = (*norm < eta * onorm)?PETSC_TRUE:PETSC_FALSE;
158:   }
159:   return(0);
160: }

162: /*
163:   Extend the TOAR basis by applying the the matrix operator
164:   over a vector which is decomposed in the TOAR way
165:   Input:
166:     - pbc: array containing the polynomial basis coefficients
167:     - S,V: define the latest Arnoldi vector (nv vectors in V)
168:   Output:
169:     - t: new vector extending the TOAR basis
170:     - r: temporary coefficients to compute the TOAR coefficients
171:          for the new Arnoldi vector
172:   Workspace: t_ (two vectors)
173: */
174: static PetscErrorCode PEPTOARExtendBasis(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscScalar *S,PetscInt ls,PetscInt nv,BV V,Vec t,PetscScalar *r,PetscInt lr,Vec *t_)
175: {
177:   PetscInt       nmat=pep->nmat,deg=nmat-1,k,j,off=0,lss;
178:   Vec            v=t_[0],ve=t_[1],q=t_[2];
179:   PetscScalar    alpha=1.0,*ss,a;
180:   PetscReal      *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
181:   PetscBool      flg;

184:   BVSetActiveColumns(pep->V,0,nv);
185:   STGetTransform(pep->st,&flg);
186:   if (sinvert) {
187:     for (j=0;j<nv;j++) {
188:       if (deg>1) r[lr+j] = S[j]/ca[0];
189:       if (deg>2) r[2*lr+j] = (S[ls+j]+(sigma-cb[1])*r[lr+j])/ca[1];
190:     }
191:     for (k=2;k<deg-1;k++) {
192:       for (j=0;j<nv;j++) r[(k+1)*lr+j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
193:     }
194:     k = deg-1;
195:     for (j=0;j<nv;j++) r[j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
196:     ss = r; lss = lr; off = 1; alpha = -1.0; a = pep->sfactor;
197:   } else {
198:     ss = S; lss = ls; off = 0; alpha = -ca[deg-1]; a = 1.0;
199:   }
200:   BVMultVec(V,1.0,0.0,v,ss+off*lss);
201:   if (pep->Dr) { /* balancing */
202:     VecPointwiseMult(v,v,pep->Dr);
203:   }
204:   STMatMult(pep->st,off,v,q);
205:   VecScale(q,a);
206:   for (j=1+off;j<deg+off-1;j++) {
207:     BVMultVec(V,1.0,0.0,v,ss+j*lss);
208:     if (pep->Dr) {
209:       VecPointwiseMult(v,v,pep->Dr);
210:     }
211:     STMatMult(pep->st,j,v,t);
212:     a *= pep->sfactor;
213:     VecAXPY(q,a,t);
214:   }
215:   if (sinvert) {
216:     BVMultVec(V,1.0,0.0,v,ss);
217:     if (pep->Dr) {
218:       VecPointwiseMult(v,v,pep->Dr);
219:     }
220:     STMatMult(pep->st,deg,v,t);
221:     a *= pep->sfactor;
222:     VecAXPY(q,a,t);
223:   } else {
224:     BVMultVec(V,1.0,0.0,ve,ss+(deg-1)*lss);
225:     if (pep->Dr) {
226:       VecPointwiseMult(ve,ve,pep->Dr);
227:     }
228:     a *= pep->sfactor;
229:     STMatMult(pep->st,deg-1,ve,t);
230:     VecAXPY(q,a,t);
231:     a *= pep->sfactor;
232:   }
233:   if (flg || !sinvert) alpha /= a;
234:   STMatSolve(pep->st,q,t);
235:   VecScale(t,alpha);
236:   if (!sinvert) {
237:     if (cg[deg-1]!=0) { VecAXPY(t,cg[deg-1],v); }
238:     if (cb[deg-1]!=0) { VecAXPY(t,cb[deg-1],ve); }
239:   }
240:   if (pep->Dr) {
241:     VecPointwiseDivide(t,t,pep->Dr);
242:   }
243:   return(0);
244: }

246: /*
247:   Compute TOAR coefficients of the blocks of the new Arnoldi vector computed
248: */
249: static PetscErrorCode PEPTOARCoefficients(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscInt nv,PetscScalar *S,PetscInt ls,PetscScalar *r,PetscInt lr,PetscScalar *x)
250: {
251:   PetscInt    k,j,nmat=pep->nmat,d=nmat-1;
252:   PetscReal   *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
253:   PetscScalar t=1.0,tp=0.0,tt;

256:   if (sinvert) {
257:     for (k=1;k<d;k++) {
258:       tt = t;
259:       t = ((sigma-cb[k-1])*t-cg[k-1]*tp)/ca[k-1]; /* k-th basis polynomial */
260:       tp = tt;
261:       for (j=0;j<=nv;j++) r[k*lr+j] += t*x[j];
262:     }
263:   } else {
264:     for (j=0;j<=nv;j++) r[j] = (cb[0]-sigma)*S[j]+ca[0]*S[ls+j];
265:     for (k=1;k<d-1;k++) {
266:       for (j=0;j<=nv;j++) r[k*lr+j] = (cb[k]-sigma)*S[k*ls+j]+ca[k]*S[(k+1)*ls+j]+cg[k]*S[(k-1)*ls+j];
267:     }
268:     if (sigma!=0.0) for (j=0;j<=nv;j++) r[(d-1)*lr+j] -= sigma*S[(d-1)*ls+j];
269:   }
270:   return(0);
271: }

273: /*
274:   Compute a run of Arnoldi iterations dim(work)=ld
275: */
276: static PetscErrorCode PEPTOARrun(PEP pep,PetscScalar sigma,PetscInt *nq,PetscScalar *S,PetscInt ld,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscScalar *work,Vec *t_)
277: {
279:   PetscInt       i,j,p,m=*M,nwu=0,deg=pep->nmat-1;
280:   PetscInt       lds=ld*deg,nqt=*nq;
281:   Vec            t;
282:   PetscReal      norm;
283:   PetscBool      flg,sinvert=PETSC_FALSE,lindep;
284:   PetscScalar    *x;

287:   STGetTransform(pep->st,&flg);
288:   if (!flg) {
289:     /* spectral transformation handled by the solver */
290:     PetscObjectTypeCompareAny((PetscObject)pep->st,&flg,STSINVERT,STSHIFT,"");
291:     if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"ST type not supported for TOAR without transforming matrices");
292:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
293:   }
294:   for (j=k;j<m;j++) {
295:     /* apply operator */
296:     BVGetColumn(pep->V,nqt,&t);
297:     PEPTOARExtendBasis(pep,sinvert,sigma,S+j*lds,ld,nqt,pep->V,t,S+(j+1)*lds,ld,t_);
298:     BVRestoreColumn(pep->V,nqt,&t);

300:     /* orthogonalize */
301:     if (sinvert) x = S+(j+1)*lds;
302:     else x = S+(deg-1)*ld+(j+1)*lds;
303:     BVOrthogonalizeColumn(pep->V,nqt,x,&norm,&lindep);
304:     if (!lindep) {
305:       x[nqt] = norm;
306:       BVScaleColumn(pep->V,nqt,1.0/norm);
307:       nqt++;
308:     }

310:     PEPTOARCoefficients(pep,sinvert,sigma,nqt-1,S+j*lds,ld,S+(j+1)*lds,ld,x);
311:     /* level-2 orthogonalization */
312:     PEPTOAROrth2(pep,S,ld,deg,j+1,H+j*ldh,&norm,breakdown,work+nwu);
313:     H[j+1+ldh*j] = norm;
314:     *nq = nqt;
315:     if (*breakdown) {
316:       *M = j+1;
317:       break;
318:     }
319:     for (p=0;p<deg;p++) {
320:       for (i=0;i<=j+deg;i++) {
321:         S[i+p*ld+(j+1)*lds] /= norm;
322:       }
323:     }
324:   }
325:   return(0);
326: }

328: /*
329:   dim(rwork)=6*n; dim(work)=6*ld*lds+2*cs1
330: */
331: static PetscErrorCode PEPTOARTrunc(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt *rs1a,PetscInt cs1,PetscInt lock,PetscInt newc,PetscBool final,PetscScalar *work,PetscReal *rwork)
332: {
333: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
335:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD/GEQRF/ORGQR - Lapack routine is unavailable");
336: #else
338:   PetscInt       nwu=0,nrwu=0,nnc,nrow,lwa;
339:   PetscInt       j,i,k,n,lds=deg*ld,rs1=*rs1a,rk=0,offu;
340:   PetscScalar    *M,*V,*pU,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau;
341:   PetscReal      *sg,tol;
342:   PetscBLASInt   cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
343:   Mat            U;

346:   if (cs1==0) return(0);
347:   lwa = 6*ld*lds+2*cs1;
348:   n = (rs1>deg*cs1)?deg*cs1:rs1;
349:   nnc = cs1-lock-newc;
350:   nrow = rs1-lock;
351:   PetscMalloc4(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pU,deg*rs1,&tau);
352:   offu = lock*(rs1+1);
353:   M = work+nwu;
354:   nwu += rs1*cs1*deg;
355:   sg = rwork+nrwu;
356:   nrwu += n;
357:   PetscMemzero(pU,rs1*n*sizeof(PetscScalar));
358:   V = work+nwu;
359:   nwu += deg*cs1*n;
360:   PetscBLASIntCast(n,&n_);
361:   PetscBLASIntCast(nnc,&nnc_);
362:   PetscBLASIntCast(cs1,&cs1_);
363:   PetscBLASIntCast(rs1,&rs1_);
364:   PetscBLASIntCast(newc,&newc_);
365:   PetscBLASIntCast(newc*deg,&newctdeg);
366:   PetscBLASIntCast(nnc*deg,&nnctdeg);
367:   PetscBLASIntCast(cs1*deg,&cs1tdeg);
368:   PetscBLASIntCast(lwa-nwu,&lw_);
369:   PetscBLASIntCast(nrow,&nrow_);
370:   PetscBLASIntCast(lds,&lds_);
371:   if (newc>0) {
372:   /* truncate columns associated with new converged eigenpairs */
373:     for (j=0;j<deg;j++) {
374:       for (i=lock;i<lock+newc;i++) {
375:         PetscMemcpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ld+lock,nrow*sizeof(PetscScalar));
376:       }
377:     }
378: #if !defined (PETSC_USE_COMPLEX)
379:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pU+offu,&rs1_,V,&n_,work+nwu,&lw_,&info));
380: #else
381:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pU+offu,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));
382: #endif
383:     SlepcCheckLapackInfo("gesvd",info);
384:     /* SVD has rank min(newc,nrow) */
385:     rk = PetscMin(newc,nrow);
386:     for (i=0;i<rk;i++) {
387:       t = sg[i];
388:       PetscStackCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,V+i,&n_));
389:     }
390:     for (i=0;i<deg;i++) {
391:       for (j=lock;j<lock+newc;j++) {
392:         PetscMemcpy(S+j*lds+i*ld+lock,V+(newc*i+j-lock)*n,rk*sizeof(PetscScalar));
393:         PetscMemzero(S+j*lds+i*ld+lock+rk,(ld-lock-rk)*sizeof(PetscScalar));
394:       }
395:     }
396:     /*
397:       update columns associated with non-converged vectors, orthogonalize
398:        against pU so that next M has rank nnc+d-1 insted of nrow+d-1
399:     */
400:     for (i=0;i<deg;i++) {
401:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pU+offu,&rs1_,S+(lock+newc)*lds+i*ld+lock,&lds_,&zero,SS+i*newc*nnc,&newc_));
402:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pU+offu,&rs1_,SS+i*newc*nnc,&newc_,&sone,S+(lock+newc)*lds+i*ld+lock,&lds_));
403:       /* repeat orthogonalization step */
404:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pU+offu,&rs1_,S+(lock+newc)*lds+i*ld+lock,&lds_,&zero,SS2,&newc_));
405:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pU+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ld+lock,&lds_));
406:       for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
407:     }
408:   }
409:   /* truncate columns associated with non-converged eigenpairs */
410:   for (j=0;j<deg;j++) {
411:     for (i=lock+newc;i<cs1;i++) {
412:       PetscMemcpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ld+lock,nrow*sizeof(PetscScalar));
413:     }
414:   }
415: #if !defined (PETSC_USE_COMPLEX)
416:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pU+offu+newc*rs1,&rs1_,V,&n_,work+nwu,&lw_,&info));
417: #else
418:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pU+offu+newc*rs1,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));
419: #endif
420:   SlepcCheckLapackInfo("gesvd",info);
421:   tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
422:   for (i=0;i<PetscMin(n_,nnctdeg);i++) if (sg[i]>tol) rk++;
423:   rk = PetscMin(nnc+deg-1,rk);
424:   /* the SVD has rank (atmost) nnc+deg-1 */
425:   for (i=0;i<rk;i++) {
426:     t = sg[i];
427:     PetscStackCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,V+i,&n_));
428:   }
429:   /* update S */
430:   PetscMemzero(S+cs1*lds,(ld-cs1)*lds*sizeof(PetscScalar));
431:   k = ld-lock-newc-rk;
432:   for (i=0;i<deg;i++) {
433:     for (j=lock+newc;j<cs1;j++) {
434:       PetscMemcpy(S+j*lds+i*ld+lock+newc,V+(nnc*i+j-lock-newc)*n,rk*sizeof(PetscScalar));
435:       PetscMemzero(S+j*lds+i*ld+lock+newc+rk,k*sizeof(PetscScalar));
436:     }
437:   }
438:   if (newc>0) {
439:     for (i=0;i<deg;i++) {
440:       p = SS+nnc*newc*i;
441:       for (j=lock+newc;j<cs1;j++) {
442:         for (k=0;k<newc;k++) S[j*lds+i*ld+lock+k] = *(p++);
443:       }
444:     }
445:   }

447:   /* orthogonalize pU */
448:   rk = rk+newc;
449:   PetscBLASIntCast(rk,&rk_);
450:   PetscBLASIntCast(cs1-lock,&nnc_);
451:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pU+offu,&rs1_,tau,work+nwu,&lw_,&info));
452:   SlepcCheckLapackInfo("geqrf",info);
453:   for (i=0;i<deg;i++) {
454:     PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pU+offu,&rs1_,S+lock*lds+lock+i*ld,&lds_));
455:   }
456:   PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&nrow_,&rk_,&rk_,pU+offu,&rs1_,tau,work+nwu,&lw_,&info));
457:   SlepcCheckLapackInfo("ungqr",info);

459:   /* update vectors V(:,idx) = V*Q(:,idx) */
460:   rk = rk+lock;
461:   for (i=0;i<lock;i++) pU[(i+1)*rs1] = 1.0;
462:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pU,&U);
463:   BVSetActiveColumns(pep->V,lock,rs1);
464:   BVMultInPlace(pep->V,U,lock,rk);
465:   BVSetActiveColumns(pep->V,0,rk);
466:   MatDestroy(&U);
467:   *rs1a = rk;

469:   /* free work space */
470:   PetscFree4(SS,SS2,pU,tau);
471:   return(0);
472: #endif
473: }

475: /*
476:   S <- S*Q
477:   columns s-s+ncu of S
478:   rows 0-sr of S
479:   size(Q) qr x ncu
480:   dim(work)=sr*ncu
481: */
482: static PetscErrorCode PEPTOARSupdate(PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt sr,PetscInt s,PetscInt ncu,PetscInt qr,PetscScalar *Q,PetscInt ldq,PetscScalar *work)
483: {
485:   PetscScalar    a=1.0,b=0.0;
486:   PetscBLASInt   sr_,ncu_,ldq_,lds_,qr_;
487:   PetscInt       j,lds=deg*ld,i;

490:   PetscBLASIntCast(sr,&sr_);
491:   PetscBLASIntCast(qr,&qr_);
492:   PetscBLASIntCast(ncu,&ncu_);
493:   PetscBLASIntCast(lds,&lds_);
494:   PetscBLASIntCast(ldq,&ldq_);
495:   for (i=0;i<deg;i++) {
496:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S+i*ld,&lds_,Q,&ldq_,&b,work,&sr_));
497:     for (j=0;j<ncu;j++) {
498:       PetscMemcpy(S+lds*(s+j)+i*ld,work+j*sr,sr*sizeof(PetscScalar));
499:     }
500:   }
501:   return(0);
502: }

504: /*
505:   Computes T_j = phi_idx(T). In T_j and T_p are phi_{idx-1}(T)
506:    and phi_{idx-2}(T) respectively or null if idx=0,1.
507:    Tp and Tj are input/output arguments
508: */
509: static PetscErrorCode PEPEvaluateBasisM(PEP pep,PetscInt k,PetscScalar *T,PetscInt ldt,PetscInt idx,PetscScalar **Tp,PetscScalar **Tj)
510: {
512:   PetscInt       i;
513:   PetscReal      *ca,*cb,*cg;
514:   PetscScalar    *pt,g,a;
515:   PetscBLASInt   k_,ldt_;

518:   if (idx==0) {
519:     PetscMemzero(*Tj,k*k*sizeof(PetscScalar));
520:     PetscMemzero(*Tp,k*k*sizeof(PetscScalar));
521:     for (i=0;i<k;i++) (*Tj)[i+i*k] = 1.0;
522:   } else {
523:     PetscBLASIntCast(ldt,&ldt_);
524:     PetscBLASIntCast(k,&k_);
525:     ca = pep->pbc; cb = pep->pbc+pep->nmat; cg = pep->pbc+2*pep->nmat;
526:     for (i=0;i<k;i++) T[i*ldt+i] -= cb[idx-1];
527:     a = 1/ca[idx-1];
528:     g = (idx==1)?0.0:-cg[idx-1]/ca[idx-1];
529:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&a,T,&ldt_,*Tj,&k_,&g,*Tp,&k_));
530:     pt = *Tj; *Tj = *Tp; *Tp = pt;
531:     for (i=0;i<k;i++) T[i*ldt+i] += cb[idx-1];
532:   }
533:   return(0);
534: }

536: /* dim(work)=6*sr*k;*/
537: static PetscErrorCode PEPExtractInvariantPair(PEP pep,PetscScalar sigma,PetscInt sr,PetscInt k,PetscScalar *S,PetscInt ld,PetscInt deg,PetscScalar *H,PetscInt ldh,PetscScalar *work)
538: {
539: #if defined(PETSC_MISSING_LAPACK_GESV) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(PETSC_MISSING_LAPACK_GETRF)
541:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/GETRI/GETRF - Lapack routine is unavailable");
542: #else
544:   PetscInt       nw,i,j,jj,nwu=0,lds,ldt,d=pep->nmat-1,idxcpy=0;
545:   PetscScalar    *At,*Bt,*Hj,*Hp,*T,sone=1.0,g,a,*pM;
546:   PetscBLASInt   k_,sr_,lds_,ldh_,info,*p,lwork,ldt_;
547:   PetscBool      transf=PETSC_FALSE,flg;
548:   PetscReal      nrm,norm,maxnrm,*rwork;
549:   BV             *R,Y;
550:   Mat            M,*A;
551:   Vec            v;

554:   if (k==0) return(0);
555:   nw = 6*sr*k;
556:   lds = deg*ld;
557:   At = work+nwu;
558:   nwu += sr*k;
559:   Bt = work+nwu;
560:   nwu += k*k;
561:   PetscMemzero(Bt,k*k*sizeof(PetscScalar));
562:   Hj = work+nwu;
563:   nwu += k*k;
564:   Hp = work+nwu;
565:   nwu += k*k;
566:   PetscMemzero(Hp,k*k*sizeof(PetscScalar));
567:   PetscMalloc1(k,&p);
568:   PetscBLASIntCast(sr,&sr_);
569:   PetscBLASIntCast(k,&k_);
570:   PetscBLASIntCast(lds,&lds_);
571:   PetscBLASIntCast(ldh,&ldh_);
572:   STGetTransform(pep->st,&flg);
573:   if (!flg) {
574:      PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&flg);
575:     if (flg || sigma!=0.0) transf=PETSC_TRUE;
576:   }
577:   if (transf) {
578:     ldt = k;
579:     T = work+nwu;
580:     nwu += k*k;
581:     for (i=0;i<k;i++) {
582:       PetscMemcpy(T+k*i,H+i*ldh,k*sizeof(PetscScalar));
583:     }
584:     if (flg) {
585:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,T,&k_,p,&info));
586:       SlepcCheckLapackInfo("getrf",info);
587:       PetscBLASIntCast(nw-nwu,&lwork);
588:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,T,&k_,p,work+nwu,&lwork,&info));
589:       SlepcCheckLapackInfo("getri",info);
590:     }
591:     if (sigma!=0.0) for (i=0;i<k;i++) T[i+k*i] += sigma;
592:   } else {
593:     T = H; ldt = ldh;
594:   }
595:   PetscBLASIntCast(ldt,&ldt_);
596:   switch (pep->extract) {
597:   case PEP_EXTRACT_NONE:
598:     break;
599:   case PEP_EXTRACT_NORM:
600:     if (pep->basis == PEP_BASIS_MONOMIAL) {
601:       PetscBLASIntCast(ldt,&ldt_);
602:       PetscMalloc1(k,&rwork);
603:       norm = LAPACKlange_("F",&k_,&k_,T,&ldt_,rwork);
604:       PetscFree(rwork);
605:       if (norm>1.0) idxcpy = d-1;
606:     } else {
607:       PetscBLASIntCast(ldt,&ldt_);
608:       PetscMalloc1(k,&rwork);
609:       maxnrm = 0.0;
610:       for (i=0;i<pep->nmat-1;i++) {
611:         PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
612:         norm = LAPACKlange_("F",&k_,&k_,Hj,&k_,rwork);
613:         if (norm > maxnrm) {
614:           idxcpy = i;
615:           maxnrm = norm;
616:         }
617:       }
618:       PetscFree(rwork);
619:     }
620:     if (idxcpy>0) {
621:       /* copy block idxcpy of S to the first one */
622:       for (j=0;j<k;j++) {
623:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
624:       }
625:     }
626:     break;
627:   case PEP_EXTRACT_RESIDUAL:
628:     STGetTransform(pep->st,&flg);
629:     if (flg) {
630:       PetscMalloc1(pep->nmat,&A);
631:       for (i=0;i<pep->nmat;i++) {
632:         STGetMatrixTransformed(pep->st,i,A+i);
633:       }
634:     } else A = pep->A;
635:     PetscMalloc1(pep->nmat-1,&R);
636:     for (i=0;i<pep->nmat-1;i++) {
637:       BVDuplicateResize(pep->V,k,R+i);
638:     }
639:     BVDuplicateResize(pep->V,sr,&Y);
640:     MatCreateSeqDense(PETSC_COMM_SELF,sr,k,NULL,&M);
641:     g = 0.0; a = 1.0;
642:     BVSetActiveColumns(pep->V,0,sr);
643:     for (j=0;j<pep->nmat;j++) {
644:       BVMatMult(pep->V,A[j],Y);
645:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
646:       for (i=0;i<pep->nmat-1;i++) {
647:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&k_,&k_,&a,S+i*ld,&lds_,Hj,&k_,&g,At,&sr_));
648:         MatDenseGetArray(M,&pM);
649:         for (jj=0;jj<k;jj++) {
650:           PetscMemcpy(pM+jj*sr,At+jj*sr,sr*sizeof(PetscScalar));
651:         }
652:         MatDenseRestoreArray(M,&pM);
653:         BVMult(R[i],1.0,(i==0)?0.0:1.0,Y,M);
654:       }
655:     }

657:     /* frobenius norm */
658:     maxnrm = 0.0;
659:     for (i=0;i<pep->nmat-1;i++) {
660:       norm = 0.0;
661:       for (j=0;j<k;j++) {
662:         BVGetColumn(R[i],j,&v);
663:         VecNorm(v,NORM_2,&nrm);
664:         BVRestoreColumn(R[i],j,&v);
665:         norm += nrm*nrm;
666:       }
667:       norm = PetscSqrtReal(norm);
668:       if (maxnrm > norm) {
669:         maxnrm = norm;
670:         idxcpy = i;
671:       }
672:     }
673:     if (idxcpy>0) {
674:       /* copy block idxcpy of S to the first one */
675:       for (j=0;j<k;j++) {
676:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
677:       }
678:     }
679:     if (flg) PetscFree(A);
680:     for (i=0;i<pep->nmat-1;i++) {
681:       BVDestroy(&R[i]);
682:     }
683:     PetscFree(R);
684:     BVDestroy(&Y);
685:     MatDestroy(&M);
686:     break;
687:   case PEP_EXTRACT_STRUCTURED:
688:     for (j=0;j<k;j++) Bt[j+j*k] = 1.0;
689:     for (j=0;j<sr;j++) {
690:       for (i=0;i<k;i++) At[j*k+i] = PetscConj(S[i*lds+j]);
691:     }
692:     PEPEvaluateBasisM(pep,k,T,ldt,0,&Hp,&Hj);
693:     for (i=1;i<deg;i++) {
694:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
695:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&sr_,&k_,&sone,Hj,&k_,S+i*ld,&lds_,&sone,At,&k_));
696:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&k_,&k_,&sone,Hj,&k_,Hj,&k_,&sone,Bt,&k_));
697:     }
698:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&sr_,Bt,&k_,p,At,&k_,&info));
699:     SlepcCheckLapackInfo("gesv",info);
700:     for (j=0;j<sr;j++) {
701:       for (i=0;i<k;i++) S[i*lds+j] = PetscConj(At[j*k+i]);
702:     }
703:     break;
704:   }
705:   PetscFree(p);
706:   return(0);
707: #endif
708: }

710: PetscErrorCode PEPSolve_TOAR(PEP pep)
711: {
713:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
714:   PetscInt       i,j,k,l,nv=0,ld,lds,off,ldds,newn,nq=0,nconv=0,locked=0,newc;
715:   PetscInt       lwa,lrwa,nwu=0,nrwu=0,nmat=pep->nmat,deg=nmat-1;
716:   PetscScalar    *S,*Q,*work,*H,sigma;
717:   PetscReal      beta,*rwork,norm;
718:   PetscBool      breakdown=PETSC_FALSE,flg,falselock=PETSC_FALSE,sinv=PETSC_FALSE;

721:   PetscCitationsRegister(citation,&cited);
722:   if (ctx->lock) {
723:     PetscOptionsGetBool(NULL,NULL,"-pep_toar_falselocking",&falselock,NULL);
724:   }
725:   ld = ctx->ld;
726:   S = ctx->S;
727:   lds = deg*ld;        /* leading dimension of S */
728:   lwa = (deg+6)*ld*lds;
729:   lrwa = 7*lds;
730:   PetscMalloc2(lwa,&work,lrwa,&rwork);
731:   DSGetLeadingDimension(pep->ds,&ldds);
732:   STGetShift(pep->st,&sigma);

734:   /* update polynomial basis coefficients */
735:   STGetTransform(pep->st,&flg);
736:   if (pep->sfactor!=1.0) {
737:     for (i=0;i<nmat;i++) {
738:       pep->pbc[nmat+i] /= pep->sfactor;
739:       pep->pbc[2*nmat+i] /= pep->sfactor*pep->sfactor;
740:     }
741:     if (!flg) {
742:       pep->target /= pep->sfactor;
743:       RGPushScale(pep->rg,1.0/pep->sfactor);
744:       STScaleShift(pep->st,1.0/pep->sfactor);
745:       sigma /= pep->sfactor;
746:     } else {
747:       PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
748:       pep->target = sinv?pep->target*pep->sfactor:pep->target/pep->sfactor;
749:       RGPushScale(pep->rg,sinv?pep->sfactor:1.0/pep->sfactor);
750:       STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);
751:     }
752:   }

754:   if (flg) sigma = 0.0;

756:   /* clean projected matrix (including the extra-arrow) */
757:   DSGetArray(pep->ds,DS_MAT_A,&H);
758:   PetscMemzero(H,ldds*ldds*sizeof(PetscScalar));
759:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
760:   
761:   /* Get the starting Arnoldi vector */
762:   for (i=0;i<deg;i++) {
763:     if (i>=pep->nini) {
764:       BVSetRandomColumn(pep->V,nq);
765:     } else {
766:       BVCopyColumn(pep->V,i,nq);
767:     }
768:     BVOrthogonalizeColumn(pep->V,nq,S+i*ctx->ld,&norm,&breakdown);
769:     if (!breakdown) {
770:       BVScaleColumn(pep->V,nq,1.0/norm);
771:       S[nq+i*ctx->ld] = norm;
772:       nq++;
773:     }
774:   }
775:   if (nq==0) SETERRQ(PetscObjectComm((PetscObject)pep),1,"PEP: Problem with initial vector");
776:   PEPTOARSNorm2(lds,S,&norm);
777:   for (j=0;j<deg;j++) {
778:     for (i=0;i<=j;i++) S[i+j*ctx->ld] /= norm;
779:   }

781:   /* restart loop */
782:   l = 0;
783:   while (pep->reason == PEP_CONVERGED_ITERATING) {
784:     pep->its++;

786:     /* compute an nv-step Lanczos factorization */
787:     nv = PetscMax(PetscMin(nconv+pep->mpd,pep->ncv),nv);
788:     DSGetArray(pep->ds,DS_MAT_A,&H);
789:     PEPTOARrun(pep,sigma,&nq,S,ld,H,ldds,pep->nconv+l,&nv,&breakdown,work+nwu,pep->work);
790:     beta = PetscAbsScalar(H[(nv-1)*ldds+nv]);
791:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
792:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
793:     if (l==0) {
794:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
795:     } else {
796:       DSSetState(pep->ds,DS_STATE_RAW);
797:     }

799:     /* solve projected problem */
800:     DSSolve(pep->ds,pep->eigr,pep->eigi);
801:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
802:     DSUpdateExtraRow(pep->ds);
803:     DSSynchronize(pep->ds,pep->eigr,pep->eigi);

805:     /* check convergence */
806:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,nv-pep->nconv,beta,&k);
807:     (*pep->stopping)(pep,pep->its,pep->max_it,k,pep->nev,&pep->reason,pep->stoppingctx);

809:     /* update l */
810:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
811:     else {
812:       l = (nv==k)?0:PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
813:       if (!breakdown) {
814:         /* prepare the Rayleigh quotient for restart */
815:         DSTruncate(pep->ds,k+l);
816:         DSGetDimensions(pep->ds,&newn,NULL,NULL,NULL,NULL);
817:         l = newn-k;
818:       }
819:     }
820:     nconv = k;
821:     if (!ctx->lock && pep->reason == PEP_CONVERGED_ITERATING && !breakdown) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

823:     /* update S */
824:     off = pep->nconv*ldds;
825:     DSGetArray(pep->ds,DS_MAT_Q,&Q);
826:     PEPTOARSupdate(S,ld,deg,nq,pep->nconv,k+l-pep->nconv,nv,Q+off,ldds,work+nwu);
827:     DSRestoreArray(pep->ds,DS_MAT_Q,&Q);

829:     /* copy last column of S */
830:     PetscMemcpy(S+lds*(k+l),S+lds*nv,lds*sizeof(PetscScalar));

832:     if (breakdown && pep->reason == PEP_CONVERGED_ITERATING) {
833:       /* stop if breakdown */
834:       PetscInfo2(pep,"Breakdown TOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
835:       pep->reason = PEP_DIVERGED_BREAKDOWN;
836:     }
837:     if (pep->reason != PEP_CONVERGED_ITERATING) {l--; flg = PETSC_TRUE;}
838:     else flg = PETSC_FALSE;
839:     /* truncate S */
840:     if (k+l+deg<=nq) {
841:       if (!falselock && ctx->lock) {
842:         newc = k-pep->nconv;
843:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,locked,newc,flg,work+nwu,rwork+nrwu);
844:         locked += newc;
845:       } else {
846:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,0,0,flg,work+nwu,rwork+nrwu);
847:       }
848:     }
849:     pep->nconv = k;
850:     PEPMonitor(pep,pep->its,nconv,pep->eigr,pep->eigi,pep->errest,nv);
851:   }
852:   if (pep->nconv>0) {
853:     /* {V*S_nconv^i}_{i=0}^{d-1} has rank nconv instead of nconv+d-1. Force zeros in each S_nconv^i block */
854:     nq = pep->nconv;

856:     /* perform Newton refinement if required */
857:     if (pep->refine==PEP_REFINE_MULTIPLE && pep->rits>0) {
858:       /* extract invariant pair */
859:       DSGetArray(pep->ds,DS_MAT_A,&H);
860:       PEPExtractInvariantPair(pep,sigma,nq,pep->nconv,S,ld,deg,H,ldds,work+nwu);
861:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
862:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
863:       DSSetState(pep->ds,DS_STATE_RAW);
864:       PEPNewtonRefinement_TOAR(pep,sigma,&pep->rits,NULL,pep->nconv,S,lds);
865:       DSSolve(pep->ds,pep->eigr,pep->eigi);
866:       DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
867:       DSSynchronize(pep->ds,pep->eigr,pep->eigi);
868:       DSGetArray(pep->ds,DS_MAT_Q,&Q);
869:       PEPTOARSupdate(S,ld,deg,nq,0,pep->nconv,pep->nconv,Q,ldds,work+nwu);
870:       DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
871:     } else {
872:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
873:       DSSetState(pep->ds,DS_STATE_RAW);
874:     }
875:   }
876:   STGetTransform(pep->st,&flg);
877:   if (pep->refine!=PEP_REFINE_MULTIPLE || pep->rits==0) {
878:     if (!flg && pep->ops->backtransform) {
879:         (*pep->ops->backtransform)(pep);
880:     }
881:     if (pep->sfactor!=1.0) {
882:       for (j=0;j<pep->nconv;j++) {
883:         pep->eigr[j] *= pep->sfactor;
884:         pep->eigi[j] *= pep->sfactor;
885:       }
886:       /* restore original values */
887:       for (i=0;i<pep->nmat;i++){
888:         pep->pbc[pep->nmat+i] *= pep->sfactor;
889:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
890:       }
891:     }
892:   }
893:   /* restore original values */
894:   if (!flg) {
895:     pep->target *= pep->sfactor;
896:     STScaleShift(pep->st,pep->sfactor);
897:   } else {
898:     STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
899:     pep->target = (sinv)?pep->target/pep->sfactor:pep->target*pep->sfactor;
900:   }
901:   if (pep->sfactor!=1.0) { RGPopScale(pep->rg); }

903:   /* change the state to raw so that DSVectors() computes eigenvectors from scratch */
904:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
905:   DSSetState(pep->ds,DS_STATE_RAW);

907:   PetscFree2(work,rwork);
908:   return(0);
909: }

911: static PetscErrorCode PEPTOARSetRestart_TOAR(PEP pep,PetscReal keep)
912: {
913:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

916:   if (keep==PETSC_DEFAULT) ctx->keep = 0.5;
917:   else {
918:     if (keep<0.1 || keep>0.9) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_OUTOFRANGE,"The keep argument must be in the range [0.1,0.9]");
919:     ctx->keep = keep;
920:   }
921:   return(0);
922: }

924: /*@
925:    PEPTOARSetRestart - Sets the restart parameter for the TOAR
926:    method, in particular the proportion of basis vectors that must be kept
927:    after restart.

929:    Logically Collective on PEP

931:    Input Parameters:
932: +  pep  - the eigenproblem solver context
933: -  keep - the number of vectors to be kept at restart

935:    Options Database Key:
936: .  -pep_toar_restart - Sets the restart parameter

938:    Notes:
939:    Allowed values are in the range [0.1,0.9]. The default is 0.5.

941:    Level: advanced

943: .seealso: PEPTOARGetRestart()
944: @*/
945: PetscErrorCode PEPTOARSetRestart(PEP pep,PetscReal keep)
946: {

952:   PetscTryMethod(pep,"PEPTOARSetRestart_C",(PEP,PetscReal),(pep,keep));
953:   return(0);
954: }

956: static PetscErrorCode PEPTOARGetRestart_TOAR(PEP pep,PetscReal *keep)
957: {
958:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

961:   *keep = ctx->keep;
962:   return(0);
963: }

965: /*@
966:    PEPTOARGetRestart - Gets the restart parameter used in the TOAR method.

968:    Not Collective

970:    Input Parameter:
971: .  pep - the eigenproblem solver context

973:    Output Parameter:
974: .  keep - the restart parameter

976:    Level: advanced

978: .seealso: PEPTOARSetRestart()
979: @*/
980: PetscErrorCode PEPTOARGetRestart(PEP pep,PetscReal *keep)
981: {

987:   PetscUseMethod(pep,"PEPTOARGetRestart_C",(PEP,PetscReal*),(pep,keep));
988:   return(0);
989: }

991: static PetscErrorCode PEPTOARSetLocking_TOAR(PEP pep,PetscBool lock)
992: {
993:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

996:   ctx->lock = lock;
997:   return(0);
998: }

1000: /*@
1001:    PEPTOARSetLocking - Choose between locking and non-locking variants of
1002:    the TOAR method.

1004:    Logically Collective on PEP

1006:    Input Parameters:
1007: +  pep  - the eigenproblem solver context
1008: -  lock - true if the locking variant must be selected

1010:    Options Database Key:
1011: .  -pep_toar_locking - Sets the locking flag

1013:    Notes:
1014:    The default is to lock converged eigenpairs when the method restarts.
1015:    This behaviour can be changed so that all directions are kept in the
1016:    working subspace even if already converged to working accuracy (the
1017:    non-locking variant).

1019:    Level: advanced

1021: .seealso: PEPTOARGetLocking()
1022: @*/
1023: PetscErrorCode PEPTOARSetLocking(PEP pep,PetscBool lock)
1024: {

1030:   PetscTryMethod(pep,"PEPTOARSetLocking_C",(PEP,PetscBool),(pep,lock));
1031:   return(0);
1032: }

1034: static PetscErrorCode PEPTOARGetLocking_TOAR(PEP pep,PetscBool *lock)
1035: {
1036:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

1039:   *lock = ctx->lock;
1040:   return(0);
1041: }

1043: /*@
1044:    PEPTOARGetLocking - Gets the locking flag used in the TOAR method.

1046:    Not Collective

1048:    Input Parameter:
1049: .  pep - the eigenproblem solver context

1051:    Output Parameter:
1052: .  lock - the locking flag

1054:    Level: advanced

1056: .seealso: PEPTOARSetLocking()
1057: @*/
1058: PetscErrorCode PEPTOARGetLocking(PEP pep,PetscBool *lock)
1059: {

1065:   PetscUseMethod(pep,"PEPTOARGetLocking_C",(PEP,PetscBool*),(pep,lock));
1066:   return(0);
1067: }

1069: PetscErrorCode PEPSetFromOptions_TOAR(PetscOptionItems *PetscOptionsObject,PEP pep)
1070: {
1072:   PetscBool      flg,lock;
1073:   PetscReal      keep;

1076:   PetscOptionsHead(PetscOptionsObject,"PEP TOAR Options");

1078:     PetscOptionsReal("-pep_toar_restart","Proportion of vectors kept after restart","PEPTOARSetRestart",0.5,&keep,&flg);
1079:     if (flg) { PEPTOARSetRestart(pep,keep); }

1081:     PetscOptionsBool("-pep_toar_locking","Choose between locking and non-locking variants","PEPTOARSetLocking",PETSC_FALSE,&lock,&flg);
1082:     if (flg) { PEPTOARSetLocking(pep,lock); }

1084:   PetscOptionsTail();
1085:   return(0);
1086: }

1088: PetscErrorCode PEPView_TOAR(PEP pep,PetscViewer viewer)
1089: {
1091:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
1092:   PetscBool      isascii;

1095:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
1096:   if (isascii) {
1097:     PetscViewerASCIIPrintf(viewer,"  %d%% of basis vectors kept after restart\n",(int)(100*ctx->keep));
1098:     PetscViewerASCIIPrintf(viewer,"  using the %slocking variant\n",ctx->lock?"":"non-");
1099:   }
1100:   return(0);
1101: }

1103: PetscErrorCode PEPDestroy_TOAR(PEP pep)
1104: {
1106:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;

1109:   PetscFree(ctx->S);
1110:   PetscFree(pep->data);
1111:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",NULL);
1112:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",NULL);
1113:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",NULL);
1114:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",NULL);
1115:   return(0);
1116: }

1118: PETSC_EXTERN PetscErrorCode PEPCreate_TOAR(PEP pep)
1119: {
1120:   PEP_TOAR       *ctx;

1124:   PetscNewLog(pep,&ctx);
1125:   pep->data = (void*)ctx;
1126:   ctx->lock = PETSC_TRUE;

1128:   pep->ops->solve          = PEPSolve_TOAR;
1129:   pep->ops->setup          = PEPSetUp_TOAR;
1130:   pep->ops->setfromoptions = PEPSetFromOptions_TOAR;
1131:   pep->ops->destroy        = PEPDestroy_TOAR;
1132:   pep->ops->view           = PEPView_TOAR;
1133:   pep->ops->backtransform  = PEPBackTransform_Default;
1134:   pep->ops->computevectors = PEPComputeVectors_Default;
1135:   pep->ops->extractvectors = PEPExtractVectors_TOAR;

1137:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",PEPTOARSetRestart_TOAR);
1138:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",PEPTOARGetRestart_TOAR);
1139:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",PEPTOARSetLocking_TOAR);
1140:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",PEPTOARGetLocking_TOAR);
1141:   return(0);
1142: }