Actual source code: ptoar.c

slepc-3.7.1 2016-05-27
Report Typos and Errors
  1: /*

  3:    SLEPc polynomial eigensolver: "toar"

  5:    Method: TOAR

  7:    Algorithm:

  9:        Two-Level Orthogonal Arnoldi.

 11:    References:

 13:        [1] Y. Su, J. Zhang and Z. Bai, "A compact Arnoldi algorithm for
 14:            polynomial eigenvalue problems", talk presented at RANMEP 2008.

 16:        [2] C. Campos and J.E. Roman, "Parallel Krylov solvers for the
 17:            polynomial eigenvalue problem in SLEPc", SIAM J. Sci. Comput.
 18:            to appear, 2016.

 20:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 21:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 22:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

 24:    This file is part of SLEPc.

 26:    SLEPc is free software: you can redistribute it and/or modify it under  the
 27:    terms of version 3 of the GNU Lesser General Public License as published by
 28:    the Free Software Foundation.

 30:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 31:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 32:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 33:    more details.

 35:    You  should have received a copy of the GNU Lesser General  Public  License
 36:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 37:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 38: */

 40: #include <slepc/private/pepimpl.h>    /*I "slepcpep.h" I*/
 41:  #include ../src/pep/impls/krylov/pepkrylov.h
 42: #include <slepcblaslapack.h>

 44: static PetscBool  cited = PETSC_FALSE;
 45: static const char citation[] =
 46:   "@Article{slepc-pep,\n"
 47:   "   author = \"C. Campos and J. E. Roman\",\n"
 48:   "   title = \"Parallel {Krylov} solvers for the polynomial eigenvalue problem in {SLEPc}\",\n"
 49:   "   journal = \"{SIAM} J. Sci. Comput.\",\n"
 50:   "   volume = \"to appear\",\n"
 51:   "   number = \"\",\n"
 52:   "   pages = \"\",\n"
 53:   "   year = \"2016,\"\n"
 54:   "   doi = \"http://dx.doi.org/10.xxxx/yyyy\"\n"
 55:   "}\n";

 59: /*
 60:   Norm of [sp;sq]
 61: */
 62: static PetscErrorCode PEPTOARSNorm2(PetscInt n,PetscScalar *S,PetscReal *norm)
 63: {
 65:   PetscBLASInt   n_,one=1;

 68:   PetscBLASIntCast(n,&n_);
 69:   *norm = BLASnrm2_(&n_,S,&one);
 70:   return(0);
 71: }

 75: PetscErrorCode PEPSetUp_TOAR(PEP pep)
 76: {
 78:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
 79:   PetscBool      shift,sinv,flg,lindep;
 80:   PetscInt       i,lds,deg=pep->nmat-1,j;
 81:   PetscReal      norm;

 84:   pep->lineariz = PETSC_TRUE;
 85:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
 86:   if (!ctx->lock && pep->mpd<pep->ncv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Should not use mpd parameter in non-locking variant");
 87:   if (!pep->max_it) pep->max_it = PetscMax(100,2*(pep->nmat-1)*pep->n/pep->ncv);
 88:   /* Set STSHIFT as the default ST */
 89:   if (!((PetscObject)pep->st)->type_name) {
 90:     STSetType(pep->st,STSHIFT);
 91:   }
 92:   PetscObjectTypeCompare((PetscObject)pep->st,STSHIFT,&shift);
 93:   PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
 94:   if (!shift && !sinv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Only STSHIFT and STSINVERT spectral transformations can be used");
 95:   if (!pep->which) {
 96:     if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
 97:     else pep->which = PEP_LARGEST_MAGNITUDE;
 98:   }
 99:   if (pep->problem_type!=PEP_GENERAL) {
100:     PetscInfo(pep,"Problem type ignored, performing a non-symmetric linearization\n");
101:   }

103:   if (!ctx->keep) ctx->keep = 0.5;

105:   PEPAllocateSolution(pep,pep->nmat-1);
106:   PEPSetWorkVecs(pep,3);
107:   DSSetType(pep->ds,DSNHEP);
108:   DSSetExtraRow(pep->ds,PETSC_TRUE);
109:   DSAllocate(pep->ds,pep->ncv+1);

111:   PEPBasisCoefficients(pep,pep->pbc);
112:   STGetTransform(pep->st,&flg);
113:   if (!flg) {
114:     PetscMalloc1(pep->nmat,&pep->solvematcoeffs);
115:     if (sinv) {
116:       PEPEvaluateBasis(pep,pep->target,0,pep->solvematcoeffs,NULL);
117:     } else {
118:       for (i=0;i<pep->nmat-1;i++) pep->solvematcoeffs[i] = 0.0;
119:       pep->solvematcoeffs[pep->nmat-1] = 1.0;
120:     }
121:   }
122:   ctx->ld = pep->ncv+(pep->nmat-1);   /* number of rows of each fragment of S */
123:   lds = (pep->nmat-1)*ctx->ld;
124:   PetscCalloc1(lds*ctx->ld,&ctx->S);

126:   /* process starting vector */
127:   ctx->nq = 0;
128:   for (i=0;i<deg;i++) {
129:     if (pep->nini>-deg) {
130:       BVSetRandomColumn(pep->V,ctx->nq);
131:     } else {
132:       BVInsertVec(pep->V,ctx->nq,pep->IS[i]);
133:     }
134:     BVOrthogonalizeColumn(pep->V,ctx->nq,ctx->S+i*ctx->ld,&norm,&lindep);
135:     if (!lindep) {
136:       BVScaleColumn(pep->V,ctx->nq,1.0/norm);
137:       ctx->S[ctx->nq+i*ctx->ld] = norm;
138:       ctx->nq++;
139:     }
140:   }
141:   if (ctx->nq==0) SETERRQ(PetscObjectComm((PetscObject)pep),1,"PEP: Problem with initial vector");
142:   PEPTOARSNorm2(lds,ctx->S,&norm);
143:   for (j=0;j<deg;j++) {
144:     for (i=0;i<=j;i++) ctx->S[i+j*ctx->ld] /= norm;
145:   }
146:   if (pep->nini<0) {
147:     SlepcBasisDestroy_Private(&pep->nini,&pep->IS);
148:   }
149:   return(0);
150: }

154: /*
155:  Computes GS orthogonalization   [z;x] - [Sp;Sq]*y,
156:  where y = ([Sp;Sq]'*[z;x]).
157:    k: Column from S to be orthogonalized against previous columns.
158:    Sq = Sp+ld
159:    dim(work)>=k
160: */
161: static PetscErrorCode PEPTOAROrth2(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt k,PetscScalar *y,PetscReal *norm,PetscBool *lindep,PetscScalar *work)
162: {
164:   PetscBLASInt   n_,lds_,k_,one=1;
165:   PetscScalar    sonem=-1.0,sone=1.0,szero=0.0,*x0,*x,*c;
166:   PetscInt       i,lds=deg*ld,n;
167:   PetscReal      eta,onorm;

170:   BVGetOrthogonalization(pep->V,NULL,NULL,&eta,NULL);
171:   n = k+deg-1;
172:   PetscBLASIntCast(n,&n_);
173:   PetscBLASIntCast(deg*ld,&lds_);
174:   PetscBLASIntCast(k,&k_); /* number of vectors to orthogonalize against them */
175:   c = work;
176:   x0 = S+k*lds;
177:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,y,&one));
178:   for (i=1;i<deg;i++) {
179:     x = S+i*ld+k*lds;
180:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,y,&one));
181:   }
182:   for (i=0;i<deg;i++) {
183:     x= S+i*ld+k*lds;
184:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,y,&one,&sone,x,&one));
185:   }
186:   PEPTOARSNorm2(lds,S+k*lds,&onorm);
187:   /* twice */
188:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S,&lds_,x0,&one,&szero,c,&one));
189:   for (i=1;i<deg;i++) {
190:     x = S+i*ld+k*lds;
191:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n_,&k_,&sone,S+i*ld,&lds_,x,&one,&sone,c,&one));
192:   }
193:   for (i=0;i<deg;i++) {
194:     x= S+i*ld+k*lds;
195:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&k_,&sonem,S+i*ld,&lds_,c,&one,&sone,x,&one));
196:   }
197:   for (i=0;i<k;i++) y[i] += c[i];
198:   if (norm) {
199:     PEPTOARSNorm2(lds,S+k*lds,norm);
200:     if (lindep) *lindep = (*norm < eta * onorm)?PETSC_TRUE:PETSC_FALSE;
201:   }
202:   return(0);
203: }

207: /*
208:   Extend the TOAR basis by applying the the matrix operator
209:   over a vector which is decomposed in the TOAR way
210:   Input:
211:     - pbc: array containing the polynomial basis coefficients
212:     - S,V: define the latest Arnoldi vector (nv vectors in V)
213:   Output:
214:     - t: new vector extending the TOAR basis
215:     - r: temporary coefficients to compute the TOAR coefficients
216:          for the new Arnoldi vector
217:   Workspace: t_ (two vectors)
218: */
219: static PetscErrorCode PEPTOARExtendBasis(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscScalar *S,PetscInt ls,PetscInt nv,BV V,Vec t,PetscScalar *r,PetscInt lr,Vec *t_)
220: {
222:   PetscInt       nmat=pep->nmat,deg=nmat-1,k,j,off=0,lss;
223:   Vec            v=t_[0],ve=t_[1],q=t_[2];
224:   PetscScalar    alpha=1.0,*ss,a;
225:   PetscReal      *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
226:   PetscBool      flg;

229:   BVSetActiveColumns(pep->V,0,nv);
230:   STGetTransform(pep->st,&flg);
231:   if (sinvert) {
232:     for (j=0;j<nv;j++) {
233:       if (deg>1) r[lr+j] = S[j]/ca[0];
234:       if (deg>2) r[2*lr+j] = (S[ls+j]+(sigma-cb[1])*r[lr+j])/ca[1];
235:     }
236:     for (k=2;k<deg-1;k++) {
237:       for (j=0;j<nv;j++) r[(k+1)*lr+j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
238:     }
239:     k = deg-1;
240:     for (j=0;j<nv;j++) r[j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
241:     ss = r; lss = lr; off = 1; alpha = -1.0; a = pep->sfactor;
242:   } else {
243:     ss = S; lss = ls; off = 0; alpha = -ca[deg-1]; a = 1.0;
244:   }
245:   BVMultVec(V,1.0,0.0,v,ss+off*lss);
246:   if (pep->Dr) { /* balancing */
247:     VecPointwiseMult(v,v,pep->Dr);
248:   }
249:   STMatMult(pep->st,off,v,q);
250:   VecScale(q,a);
251:   for (j=1+off;j<deg+off-1;j++) {
252:     BVMultVec(V,1.0,0.0,v,ss+j*lss);
253:     if (pep->Dr) {
254:       VecPointwiseMult(v,v,pep->Dr);
255:     }
256:     STMatMult(pep->st,j,v,t);
257:     a *= pep->sfactor;
258:     VecAXPY(q,a,t);
259:   }
260:   if (sinvert) {
261:     BVMultVec(V,1.0,0.0,v,ss);
262:     if (pep->Dr) {
263:       VecPointwiseMult(v,v,pep->Dr);
264:     }
265:     STMatMult(pep->st,deg,v,t);
266:     a *= pep->sfactor;
267:     VecAXPY(q,a,t);
268:   } else {
269:     BVMultVec(V,1.0,0.0,ve,ss+(deg-1)*lss);
270:     if (pep->Dr) {
271:       VecPointwiseMult(ve,ve,pep->Dr);
272:     }
273:     a *= pep->sfactor;
274:     STMatMult(pep->st,deg-1,ve,t);
275:     VecAXPY(q,a,t);
276:     a *= pep->sfactor;
277:   }
278:   if (flg || !sinvert) alpha /= a;
279:   STMatSolve(pep->st,q,t);
280:   VecScale(t,alpha);
281:   if (!sinvert) {
282:     if (cg[deg-1]!=0) { VecAXPY(t,cg[deg-1],v); }
283:     if (cb[deg-1]!=0) { VecAXPY(t,cb[deg-1],ve); }
284:   }
285:   if (pep->Dr) {
286:     VecPointwiseDivide(t,t,pep->Dr);
287:   }
288:   return(0);
289: }

293: /*
294:   Compute TOAR coefficients of the blocks of the new Arnoldi vector computed
295: */
296: static PetscErrorCode PEPTOARCoefficients(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscInt nv,PetscScalar *S,PetscInt ls,PetscScalar *r,PetscInt lr,PetscScalar *x)
297: {
298:   PetscInt    k,j,nmat=pep->nmat,d=nmat-1;
299:   PetscReal   *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
300:   PetscScalar t=1.0,tp=0.0,tt;

303:   if (sinvert) {
304:     for (k=1;k<d;k++) {
305:       tt = t;
306:       t = ((sigma-cb[k-1])*t-cg[k-1]*tp)/ca[k-1]; /* k-th basis polynomial */
307:       tp = tt;
308:       for (j=0;j<=nv;j++) r[k*lr+j] += t*x[j];
309:     }
310:   } else {
311:     for (j=0;j<=nv;j++) r[j] = (cb[0]-sigma)*S[j]+ca[0]*S[ls+j];
312:     for (k=1;k<d-1;k++) {
313:       for (j=0;j<=nv;j++) r[k*lr+j] = (cb[k]-sigma)*S[k*ls+j]+ca[k]*S[(k+1)*ls+j]+cg[k]*S[(k-1)*ls+j];
314:     }
315:     if (sigma!=0.0) for (j=0;j<=nv;j++) r[(d-1)*lr+j] -= sigma*S[(d-1)*ls+j];
316:   }
317:   return(0);
318: }

322: /*
323:   Compute a run of Arnoldi iterations dim(work)=ld
324: */
325: static PetscErrorCode PEPTOARrun(PEP pep,PetscScalar sigma,PetscInt *nq,PetscScalar *S,PetscInt ld,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscScalar *work,Vec *t_)
326: {
328:   PetscInt       i,j,p,m=*M,nwu=0,deg=pep->nmat-1;
329:   PetscInt       lds=ld*deg,nqt=*nq;
330:   Vec            t;
331:   PetscReal      norm;
332:   PetscBool      flg,sinvert=PETSC_FALSE,lindep;
333:   PetscScalar    *x;

336:   STGetTransform(pep->st,&flg);
337:   if (!flg) {
338:     /* spectral transformation handled by the solver */
339:     PetscObjectTypeCompareAny((PetscObject)pep->st,&flg,STSINVERT,STSHIFT,"");
340:     if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"STtype not supported fr TOAR without transforming matrices");
341:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
342:   }
343:   for (j=k;j<m;j++) {
344:     /* apply operator */
345:     BVGetColumn(pep->V,nqt,&t);
346:     PEPTOARExtendBasis(pep,sinvert,sigma,S+j*lds,ld,nqt,pep->V,t,S+(j+1)*lds,ld,t_);
347:     BVRestoreColumn(pep->V,nqt,&t);

349:     /* orthogonalize */
350:     if (sinvert) x = S+(j+1)*lds;
351:     else x = S+(deg-1)*ld+(j+1)*lds;
352:     BVOrthogonalizeColumn(pep->V,nqt,x,&norm,&lindep);
353:     if (!lindep) {
354:       x[nqt] = norm;
355:       BVScaleColumn(pep->V,nqt,1.0/norm);
356:       nqt++;
357:     }

359:     PEPTOARCoefficients(pep,sinvert,sigma,nqt-1,S+j*lds,ld,S+(j+1)*lds,ld,x);
360:     /* level-2 orthogonalization */
361:     PEPTOAROrth2(pep,S,ld,deg,j+1,H+j*ldh,&norm,breakdown,work+nwu);
362:     H[j+1+ldh*j] = norm;
363:     *nq = nqt;
364:     if (*breakdown) {
365:       *M = j+1;
366:       break;
367:     }
368:     for (p=0;p<deg;p++) {
369:       for (i=0;i<=j+deg;i++) {
370:         S[i+p*ld+(j+1)*lds] /= norm;
371:       }
372:     }
373:   }
374:   return(0);
375: }

379: /*
380:   dim(rwork)=6*n; dim(work)=6*ld*lds+2*cs1
381: */
382: static PetscErrorCode PEPTOARTrunc(PEP pep,PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt *rs1a,PetscInt cs1,PetscInt lock,PetscInt newc,PetscBool final,PetscScalar *work,PetscReal *rwork)
383: {
384: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
386:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD/GEQRF/ORGQR - Lapack routine is unavailable");
387: #else
389:   PetscInt       nwu=0,nrwu=0,nnc,nrow,lwa;
390:   PetscInt       j,i,k,n,lds=deg*ld,rs1=*rs1a,rk=0,offu;
391:   PetscScalar    *M,*V,*pU,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau;
392:   PetscReal      *sg,tol;
393:   PetscBLASInt   cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
394:   Mat            U;

397:   if (cs1==0) return(0);
398:   lwa = 6*ld*lds+2*cs1;
399:   n = (rs1>deg*cs1)?deg*cs1:rs1;
400:   nnc = cs1-lock-newc;
401:   nrow = rs1-lock;
402:   PetscMalloc4(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pU,deg*rs1,&tau);
403:   offu = lock*(rs1+1);
404:   M = work+nwu;
405:   nwu += rs1*cs1*deg;
406:   sg = rwork+nrwu;
407:   nrwu += n;
408:   PetscMemzero(pU,rs1*n*sizeof(PetscScalar));
409:   V = work+nwu;
410:   nwu += deg*cs1*n;
411:   PetscBLASIntCast(n,&n_);
412:   PetscBLASIntCast(nnc,&nnc_);
413:   PetscBLASIntCast(cs1,&cs1_);
414:   PetscBLASIntCast(rs1,&rs1_);
415:   PetscBLASIntCast(newc,&newc_);
416:   PetscBLASIntCast(newc*deg,&newctdeg);
417:   PetscBLASIntCast(nnc*deg,&nnctdeg);
418:   PetscBLASIntCast(cs1*deg,&cs1tdeg);
419:   PetscBLASIntCast(lwa-nwu,&lw_);
420:   PetscBLASIntCast(nrow,&nrow_);
421:   PetscBLASIntCast(lds,&lds_);
422:   if (newc>0) {
423:   /* truncate columns associated with new converged eigenpairs */
424:     for (j=0;j<deg;j++) {
425:       for (i=lock;i<lock+newc;i++) {
426:         PetscMemcpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ld+lock,nrow*sizeof(PetscScalar));
427:       }
428:     }
429: #if !defined (PETSC_USE_COMPLEX)
430:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pU+offu,&rs1_,V,&n_,work+nwu,&lw_,&info));
431: #else
432:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pU+offu,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));
433: #endif
434:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
435:     /* SVD has rank min(newc,nrow) */
436:     rk = PetscMin(newc,nrow);
437:     for (i=0;i<rk;i++) {
438:       t = sg[i];
439:       PetscStackCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,V+i,&n_));
440:     }
441:     for (i=0;i<deg;i++) {
442:       for (j=lock;j<lock+newc;j++) {
443:         PetscMemcpy(S+j*lds+i*ld+lock,V+(newc*i+j-lock)*n,rk*sizeof(PetscScalar));
444:         PetscMemzero(S+j*lds+i*ld+lock+rk,(ld-lock-rk)*sizeof(PetscScalar));
445:       }
446:     }
447:     /*
448:       update columns associated with non-converged vectors, orthogonalize
449:        against pU so that next M has rank nnc+d-1 insted of nrow+d-1
450:     */
451:     for (i=0;i<deg;i++) {
452:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pU+offu,&rs1_,S+(lock+newc)*lds+i*ld+lock,&lds_,&zero,SS+i*newc*nnc,&newc_));
453:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pU+offu,&rs1_,SS+i*newc*nnc,&newc_,&sone,S+(lock+newc)*lds+i*ld+lock,&lds_));
454:       /* repeat orthogonalization step */
455:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pU+offu,&rs1_,S+(lock+newc)*lds+i*ld+lock,&lds_,&zero,SS2,&newc_));
456:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pU+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ld+lock,&lds_));
457:       for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
458:     }
459:   }
460:   /* truncate columns associated with non-converged eigenpairs */
461:   for (j=0;j<deg;j++) {
462:     for (i=lock+newc;i<cs1;i++) {
463:       PetscMemcpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ld+lock,nrow*sizeof(PetscScalar));
464:     }
465:   }
466: #if !defined (PETSC_USE_COMPLEX)
467:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pU+offu+newc*rs1,&rs1_,V,&n_,work+nwu,&lw_,&info));
468: #else
469:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pU+offu+newc*rs1,&rs1_,V,&n_,work+nwu,&lw_,rwork+nrwu,&info));
470: #endif
471:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
472:   tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
473:   for (i=0;i<PetscMin(n_,nnctdeg);i++) if (sg[i]>tol) rk++;
474:   rk = PetscMin(nnc+deg-1,rk);
475:   /* the SVD has rank (atmost) nnc+deg-1 */
476:   for (i=0;i<rk;i++) {
477:     t = sg[i];
478:     PetscStackCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,V+i,&n_));
479:   }
480:   /* update S */
481:   PetscMemzero(S+cs1*lds,(ld-cs1)*lds*sizeof(PetscScalar));
482:   k = ld-lock-newc-rk;
483:   for (i=0;i<deg;i++) {
484:     for (j=lock+newc;j<cs1;j++) {
485:       PetscMemcpy(S+j*lds+i*ld+lock+newc,V+(nnc*i+j-lock-newc)*n,rk*sizeof(PetscScalar));
486:       PetscMemzero(S+j*lds+i*ld+lock+newc+rk,k*sizeof(PetscScalar));
487:     }
488:   }
489:   if (newc>0) {
490:     for (i=0;i<deg;i++) {
491:       p = SS+nnc*newc*i;
492:       for (j=lock+newc;j<cs1;j++) {
493:         for (k=0;k<newc;k++) S[j*lds+i*ld+lock+k] = *(p++);
494:       }
495:     }
496:   }

498:   /* orthogonalize pU */
499:   rk = rk+newc;
500:   PetscBLASIntCast(rk,&rk_);
501:   PetscBLASIntCast(cs1-lock,&nnc_);
502:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pU+offu,&rs1_,tau,work+nwu,&lw_,&info));
503:   for (i=0;i<deg;i++) {
504:     PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pU+offu,&rs1_,S+lock*lds+lock+i*ld,&lds_));
505:   }
506:   PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&nrow_,&rk_,&rk_,pU+offu,&rs1_,tau,work+nwu,&lw_,&info));

508:   /* update vectors V(:,idx) = V*Q(:,idx) */
509:   rk = rk+lock;
510:   for (i=0;i<lock;i++) pU[(i+1)*rs1] = 1.0;
511:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pU,&U);
512:   BVSetActiveColumns(pep->V,lock,rs1);
513:   BVMultInPlace(pep->V,U,lock,rk);
514:   BVSetActiveColumns(pep->V,0,rk);
515:   MatDestroy(&U);
516:   *rs1a = rk;

518:   /* free work space */
519:   PetscFree4(SS,SS2,pU,tau);
520:   return(0);
521: #endif
522: }

526: /*
527:   S <- S*Q
528:   columns s-s+ncu of S
529:   rows 0-sr of S
530:   size(Q) qr x ncu
531:   dim(work)=sr*ncu
532: */
533: static PetscErrorCode PEPTOARSupdate(PetscScalar *S,PetscInt ld,PetscInt deg,PetscInt sr,PetscInt s,PetscInt ncu,PetscInt qr,PetscScalar *Q,PetscInt ldq,PetscScalar *work)
534: {
536:   PetscScalar    a=1.0,b=0.0;
537:   PetscBLASInt   sr_,ncu_,ldq_,lds_,qr_;
538:   PetscInt       j,lds=deg*ld,i;

541:   PetscBLASIntCast(sr,&sr_);
542:   PetscBLASIntCast(qr,&qr_);
543:   PetscBLASIntCast(ncu,&ncu_);
544:   PetscBLASIntCast(lds,&lds_);
545:   PetscBLASIntCast(ldq,&ldq_);
546:   for (i=0;i<deg;i++) {
547:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&ncu_,&qr_,&a,S+i*ld,&lds_,Q,&ldq_,&b,work,&sr_));
548:     for (j=0;j<ncu;j++) {
549:       PetscMemcpy(S+lds*(s+j)+i*ld,work+j*sr,sr*sizeof(PetscScalar));
550:     }
551:   }
552:   return(0);
553: }

557: /*
558:   Computes T_j = phi_idx(T). In T_j and T_p are phi_{idx-1}(T)
559:    and phi_{idx-2}(T) respectively or null if idx=0,1.
560:    Tp and Tj are input/output arguments
561: */
562: static PetscErrorCode PEPEvaluateBasisM(PEP pep,PetscInt k,PetscScalar *T,PetscInt ldt,PetscInt idx,PetscScalar **Tp,PetscScalar **Tj)
563: {
565:   PetscInt       i;
566:   PetscReal      *ca,*cb,*cg;
567:   PetscScalar    *pt,g,a;
568:   PetscBLASInt   k_,ldt_;

571:   if (idx==0) {
572:     PetscMemzero(*Tj,k*k*sizeof(PetscScalar));
573:     PetscMemzero(*Tp,k*k*sizeof(PetscScalar));
574:     for (i=0;i<k;i++) (*Tj)[i+i*k] = 1.0;
575:   } else {
576:     PetscBLASIntCast(ldt,&ldt_);
577:     PetscBLASIntCast(k,&k_);
578:     ca = pep->pbc; cb = pep->pbc+pep->nmat; cg = pep->pbc+2*pep->nmat;
579:     for (i=0;i<k;i++) T[i*ldt+i] -= cb[idx-1];
580:     a = 1/ca[idx-1];
581:     g = (idx==1)?0.0:-cg[idx-1]/ca[idx-1];
582:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&a,T,&ldt_,*Tj,&k_,&g,*Tp,&k_));
583:     pt = *Tj; *Tj = *Tp; *Tp = pt;
584:     for (i=0;i<k;i++) T[i*ldt+i] += cb[idx-1];
585:   }
586:   return(0);
587: }

591: /* dim(work)=6*sr*k;*/
592: static PetscErrorCode PEPExtractInvariantPair(PEP pep,PetscScalar sigma,PetscInt sr,PetscInt k,PetscScalar *S,PetscInt ld,PetscInt deg,PetscScalar *H,PetscInt ldh,PetscScalar *work)
593: {
594: #if defined(PETSC_MISSING_LAPACK_GESV) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(PETSC_MISSING_LAPACK_GETRF)
596:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/GETRI/GETRF - Lapack routine is unavailable");
597: #else
599:   PetscInt       nw,i,j,jj,nwu=0,lds,ldt,d=pep->nmat-1,idxcpy=0;
600:   PetscScalar    *At,*Bt,*Hj,*Hp,*T,sone=1.0,g,a,*pM;
601:   PetscBLASInt   k_,sr_,lds_,ldh_,info,*p,lwork,ldt_;
602:   PetscBool      transf=PETSC_FALSE,flg;
603:   PetscReal      nrm,norm,maxnrm,*rwork;
604:   BV             *R,Y;
605:   Mat            M,*A;
606:   Vec            v;

609:   if (k==0) return(0);
610:   nw = 6*sr*k;
611:   lds = deg*ld;
612:   At = work+nwu;
613:   nwu += sr*k;
614:   Bt = work+nwu;
615:   nwu += k*k;
616:   PetscMemzero(Bt,k*k*sizeof(PetscScalar));
617:   Hj = work+nwu;
618:   nwu += k*k;
619:   Hp = work+nwu;
620:   nwu += k*k;
621:   PetscMemzero(Hp,k*k*sizeof(PetscScalar));
622:   PetscMalloc1(k,&p);
623:   PetscBLASIntCast(sr,&sr_);
624:   PetscBLASIntCast(k,&k_);
625:   PetscBLASIntCast(lds,&lds_);
626:   PetscBLASIntCast(ldh,&ldh_);
627:   STGetTransform(pep->st,&flg);
628:   if (!flg) {
629:      PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&flg);
630:     if (flg || sigma!=0.0) transf=PETSC_TRUE;
631:   }
632:   if (transf) {
633:     ldt = k;
634:     T = work+nwu;
635:     nwu += k*k;
636:     for (i=0;i<k;i++) {
637:       PetscMemcpy(T+k*i,H+i*ldh,k*sizeof(PetscScalar));
638:     }
639:     if (flg) {
640:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,T,&k_,p,&info));
641:       PetscBLASIntCast(nw-nwu,&lwork);
642:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,T,&k_,p,work+nwu,&lwork,&info));
643:     }
644:     if (sigma!=0.0) for (i=0;i<k;i++) T[i+k*i] += sigma;
645:   } else {
646:     T = H; ldt = ldh;
647:   }
648:   PetscBLASIntCast(ldt,&ldt_);
649:   switch (pep->extract) {
650:   case PEP_EXTRACT_NONE:
651:     break;
652:   case PEP_EXTRACT_NORM:
653:     if (pep->basis == PEP_BASIS_MONOMIAL) {
654:       PetscBLASIntCast(ldt,&ldt_);
655:       PetscMalloc1(k,&rwork);
656:       norm = LAPACKlange_("F",&k_,&k_,T,&ldt_,rwork);
657:       PetscFree(rwork);
658:       if (norm>1.0) idxcpy = d-1;
659:     } else {
660:       PetscBLASIntCast(ldt,&ldt_);
661:       PetscMalloc1(k,&rwork);
662:       maxnrm = 0.0;
663:       for (i=0;i<pep->nmat-1;i++) {
664:         PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
665:         norm = LAPACKlange_("F",&k_,&k_,Hj,&k_,rwork);
666:         if (norm > maxnrm) {
667:           idxcpy = i;
668:           maxnrm = norm;
669:         }
670:       }
671:       PetscFree(rwork);
672:     }
673:     if (idxcpy>0) {
674:       /* copy block idxcpy of S to the first one */
675:       for (j=0;j<k;j++) {
676:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
677:       }
678:     }
679:     break;
680:   case PEP_EXTRACT_RESIDUAL:
681:     STGetTransform(pep->st,&flg);
682:     if (flg) {
683:       PetscMalloc1(pep->nmat,&A);
684:       for (i=0;i<pep->nmat;i++) {
685:         STGetTOperators(pep->st,i,A+i);
686:       }
687:     } else A = pep->A;
688:     PetscMalloc1(pep->nmat-1,&R);
689:     for (i=0;i<pep->nmat-1;i++) {
690:       BVDuplicateResize(pep->V,k,R+i);
691:     }
692:     BVDuplicateResize(pep->V,sr,&Y);
693:     MatCreateSeqDense(PETSC_COMM_SELF,sr,k,NULL,&M);
694:     g = 0.0; a = 1.0;
695:     BVSetActiveColumns(pep->V,0,sr);
696:     for (j=0;j<pep->nmat;j++) {
697:       BVMatMult(pep->V,A[j],Y);
698:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
699:       for (i=0;i<pep->nmat-1;i++) {
700:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&k_,&k_,&a,S+i*ld,&lds_,Hj,&k_,&g,At,&sr_));
701:         MatDenseGetArray(M,&pM);
702:         for (jj=0;jj<k;jj++) {
703:           PetscMemcpy(pM+jj*sr,At+jj*sr,sr*sizeof(PetscScalar));
704:         }
705:         MatDenseRestoreArray(M,&pM);
706:         BVMult(R[i],1.0,(i==0)?0.0:1.0,Y,M);
707:       }
708:     }

710:     /* frobenius norm */
711:     maxnrm = 0.0;
712:     for (i=0;i<pep->nmat-1;i++) {
713:       norm = 0.0;
714:       for (j=0;j<k;j++) {
715:         BVGetColumn(R[i],j,&v);
716:         VecNorm(v,NORM_2,&nrm);
717:         BVRestoreColumn(R[i],j,&v);
718:         norm += nrm*nrm;
719:       }
720:       norm = PetscSqrtReal(norm);
721:       if (maxnrm > norm) {
722:         maxnrm = norm;
723:         idxcpy = i;
724:       }
725:     }
726:     if (idxcpy>0) {
727:       /* copy block idxcpy of S to the first one */
728:       for (j=0;j<k;j++) {
729:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
730:       }
731:     }
732:     if (flg) PetscFree(A);
733:     for (i=0;i<pep->nmat-1;i++) {
734:       BVDestroy(&R[i]);
735:     }
736:     PetscFree(R);
737:     BVDestroy(&Y);
738:     MatDestroy(&M);
739:     break;
740:   case PEP_EXTRACT_STRUCTURED:
741:     for (j=0;j<k;j++) Bt[j+j*k] = 1.0;
742:     for (j=0;j<sr;j++) {
743:       for (i=0;i<k;i++) At[j*k+i] = PetscConj(S[i*lds+j]);
744:     }
745:     PEPEvaluateBasisM(pep,k,T,ldt,0,&Hp,&Hj);
746:     for (i=1;i<deg;i++) {
747:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
748:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&sr_,&k_,&sone,Hj,&k_,S+i*ld,&lds_,&sone,At,&k_));
749:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&k_,&k_,&sone,Hj,&k_,Hj,&k_,&sone,Bt,&k_));
750:     }
751:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&sr_,Bt,&k_,p,At,&k_,&info));
752:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESV %d",info);
753:     for (j=0;j<sr;j++) {
754:       for (i=0;i<k;i++) S[i*lds+j] = PetscConj(At[j*k+i]);
755:     }
756:     break;
757:   default:
758:     SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Extraction not implemented in this solver");
759:   }
760:   PetscFree(p);
761:   return(0);
762: #endif
763: }

767: PetscErrorCode PEPSolve_TOAR(PEP pep)
768: {
770:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
771:   PetscInt       i,j,k,l,nv=0,ld,lds,off,ldds,newn,nq=ctx->nq,nconv=0,locked=0,newc;
772:   PetscInt       lwa,lrwa,nwu=0,nrwu=0,nmat=pep->nmat,deg=nmat-1;
773:   PetscScalar    *S,*Q,*work,*H,sigma;
774:   PetscReal      beta,*rwork;
775:   PetscBool      breakdown=PETSC_FALSE,flg,falselock=PETSC_FALSE,sinv=PETSC_FALSE;

778:   PetscCitationsRegister(citation,&cited);
779:   if (ctx->lock) {
780:     PetscOptionsGetBool(NULL,NULL,"-pep_toar_falselocking",&falselock,NULL);
781:   }
782:   ld = ctx->ld;
783:   S = ctx->S;
784:   lds = deg*ld;        /* leading dimension of S */
785:   lwa = (deg+6)*ld*lds;
786:   lrwa = 7*lds;
787:   PetscMalloc2(lwa,&work,lrwa,&rwork);
788:   DSGetLeadingDimension(pep->ds,&ldds);
789:   STGetShift(pep->st,&sigma);

791:   /* update polynomial basis coefficients */
792:   STGetTransform(pep->st,&flg);
793:   if (pep->sfactor!=1.0) {
794:     for (i=0;i<nmat;i++) {
795:       pep->pbc[nmat+i] /= pep->sfactor;
796:       pep->pbc[2*nmat+i] /= pep->sfactor*pep->sfactor;
797:     }
798:     if (!flg) {
799:       pep->target /= pep->sfactor;
800:       RGPushScale(pep->rg,1.0/pep->sfactor);
801:       STScaleShift(pep->st,1.0/pep->sfactor);
802:       sigma /= pep->sfactor;
803:     } else {
804:       PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
805:       RGPushScale(pep->rg,sinv?pep->sfactor:1.0/pep->sfactor);
806:       STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);
807:     }
808:   }

810:   if (flg) sigma = 0.0;

812:   /* restart loop */
813:   l = 0;
814:   while (pep->reason == PEP_CONVERGED_ITERATING) {
815:     pep->its++;

817:     /* compute an nv-step Lanczos factorization */
818:     nv = PetscMax(PetscMin(nconv+pep->mpd,pep->ncv),nv);
819:     DSGetArray(pep->ds,DS_MAT_A,&H);
820:     PEPTOARrun(pep,sigma,&nq,S,ld,H,ldds,pep->nconv+l,&nv,&breakdown,work+nwu,pep->work);
821:     beta = PetscAbsScalar(H[(nv-1)*ldds+nv]);
822:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
823:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
824:     if (l==0) {
825:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
826:     } else {
827:       DSSetState(pep->ds,DS_STATE_RAW);
828:     }

830:     /* solve projected problem */
831:     DSSolve(pep->ds,pep->eigr,pep->eigi);
832:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
833:     DSUpdateExtraRow(pep->ds);

835:     /* check convergence */
836:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,nv-pep->nconv,beta,&k);
837:     (*pep->stopping)(pep,pep->its,pep->max_it,k,pep->nev,&pep->reason,pep->stoppingctx);

839:     /* update l */
840:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
841:     else {
842:       l = (nv==k)?0:PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
843:       if (!breakdown) {
844:         /* prepare the Rayleigh quotient for restart */
845:         DSTruncate(pep->ds,k+l);
846:         DSGetDimensions(pep->ds,&newn,NULL,NULL,NULL,NULL);
847:         l = newn-k;
848:       }
849:     }
850:     nconv = k;
851:     if (!ctx->lock && pep->reason == PEP_CONVERGED_ITERATING && !breakdown) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

853:     /* update S */
854:     off = pep->nconv*ldds;
855:     DSGetArray(pep->ds,DS_MAT_Q,&Q);
856:     PEPTOARSupdate(S,ld,deg,nq,pep->nconv,k+l-pep->nconv,nv,Q+off,ldds,work+nwu);
857:     DSRestoreArray(pep->ds,DS_MAT_Q,&Q);

859:     /* copy last column of S */
860:     PetscMemcpy(S+lds*(k+l),S+lds*nv,lds*sizeof(PetscScalar));

862:     if (breakdown) {
863:       /* stop if breakdown */
864:       PetscInfo2(pep,"Breakdown TOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
865:       pep->reason = PEP_DIVERGED_BREAKDOWN;
866:     }
867:     if (pep->reason != PEP_CONVERGED_ITERATING) {l--; flg = PETSC_TRUE;}
868:     else flg = PETSC_FALSE;
869:     /* truncate S */
870:     if (k+l+deg<nq) {
871:       if (!falselock && ctx->lock) {
872:         newc = k-pep->nconv;
873:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,locked,newc,flg,work+nwu,rwork+nrwu);
874:         locked += newc;
875:       } else {
876:         PEPTOARTrunc(pep,S,ld,deg,&nq,k+l+1,0,0,flg,work+nwu,rwork+nrwu);
877:       }
878:     }
879:     pep->nconv = k;
880:     PEPMonitor(pep,pep->its,nconv,pep->eigr,pep->eigi,pep->errest,nv);
881:   }
882:   if (pep->nconv>0) {
883:     /* {V*S_nconv^i}_{i=0}^{d-1} has rank nconv instead of nconv+d-1. Force zeros in each S_nconv^i block */
884:     nq = pep->nconv;

886:     /* perform Newton refinement if required */
887:     if (pep->refine==PEP_REFINE_MULTIPLE && pep->rits>0) {
888:       /* extract invariant pair */
889:       DSGetArray(pep->ds,DS_MAT_A,&H);
890:       PEPExtractInvariantPair(pep,sigma,nq,pep->nconv,S,ld,deg,H,ldds,work+nwu);
891:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
892:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
893:       DSSetState(pep->ds,DS_STATE_RAW);
894:       PEPNewtonRefinement_TOAR(pep,sigma,&pep->rits,NULL,pep->nconv,S,lds,&nq);
895:       DSSolve(pep->ds,pep->eigr,pep->eigi);
896:       DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
897:       DSGetArray(pep->ds,DS_MAT_Q,&Q);
898:       PEPTOARSupdate(S,ld,deg,nq,0,pep->nconv,pep->nconv,Q,ldds,work+nwu);
899:       DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
900:     } else {
901:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
902:       DSSetState(pep->ds,DS_STATE_RAW);
903:     }
904:   }
905:   if (pep->refine!=PEP_REFINE_MULTIPLE || pep->rits==0) {
906:     STGetTransform(pep->st,&flg);
907:     if (!flg) {
908:       if (pep->ops->backtransform) {
909:         (*pep->ops->backtransform)(pep);
910:       }
911:       /* restore original values */
912:       pep->target *= pep->sfactor;
913:       STScaleShift(pep->st,pep->sfactor);
914:     } else {
915:       STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
916:     }
917:     if (pep->sfactor!=1.0) {
918:       for (j=0;j<pep->nconv;j++) {
919:         pep->eigr[j] *= pep->sfactor;
920:         pep->eigi[j] *= pep->sfactor;
921:       }
922:       /* restore original values */
923:       for (i=0;i<pep->nmat;i++){
924:         pep->pbc[pep->nmat+i] *= pep->sfactor;
925:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
926:       }
927:     }
928:   }
929:   if (pep->sfactor!=1.0) { RGPopScale(pep->rg); }

931:   /* change the state to raw so that DSVectors() computes eigenvectors from scratch */
932:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
933:   DSSetState(pep->ds,DS_STATE_RAW);

935:   PetscFree2(work,rwork);
936:   return(0);
937: }

941: static PetscErrorCode PEPTOARSetRestart_TOAR(PEP pep,PetscReal keep)
942: {
943:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

946:   if (keep==PETSC_DEFAULT) ctx->keep = 0.5;
947:   else {
948:     if (keep<0.1 || keep>0.9) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_OUTOFRANGE,"The keep argument must be in the range [0.1,0.9]");
949:     ctx->keep = keep;
950:   }
951:   return(0);
952: }

956: /*@
957:    PEPTOARSetRestart - Sets the restart parameter for the TOAR
958:    method, in particular the proportion of basis vectors that must be kept
959:    after restart.

961:    Logically Collective on PEP

963:    Input Parameters:
964: +  pep  - the eigenproblem solver context
965: -  keep - the number of vectors to be kept at restart

967:    Options Database Key:
968: .  -pep_toar_restart - Sets the restart parameter

970:    Notes:
971:    Allowed values are in the range [0.1,0.9]. The default is 0.5.

973:    Level: advanced

975: .seealso: PEPTOARGetRestart()
976: @*/
977: PetscErrorCode PEPTOARSetRestart(PEP pep,PetscReal keep)
978: {

984:   PetscTryMethod(pep,"PEPTOARSetRestart_C",(PEP,PetscReal),(pep,keep));
985:   return(0);
986: }

990: static PetscErrorCode PEPTOARGetRestart_TOAR(PEP pep,PetscReal *keep)
991: {
992:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

995:   *keep = ctx->keep;
996:   return(0);
997: }

1001: /*@
1002:    PEPTOARGetRestart - Gets the restart parameter used in the TOAR method.

1004:    Not Collective

1006:    Input Parameter:
1007: .  pep - the eigenproblem solver context

1009:    Output Parameter:
1010: .  keep - the restart parameter

1012:    Level: advanced

1014: .seealso: PEPTOARSetRestart()
1015: @*/
1016: PetscErrorCode PEPTOARGetRestart(PEP pep,PetscReal *keep)
1017: {

1023:   PetscUseMethod(pep,"PEPTOARGetRestart_C",(PEP,PetscReal*),(pep,keep));
1024:   return(0);
1025: }

1029: static PetscErrorCode PEPTOARSetLocking_TOAR(PEP pep,PetscBool lock)
1030: {
1031:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

1034:   ctx->lock = lock;
1035:   return(0);
1036: }

1040: /*@
1041:    PEPTOARSetLocking - Choose between locking and non-locking variants of
1042:    the TOAR method.

1044:    Logically Collective on PEP

1046:    Input Parameters:
1047: +  pep  - the eigenproblem solver context
1048: -  lock - true if the locking variant must be selected

1050:    Options Database Key:
1051: .  -pep_toar_locking - Sets the locking flag

1053:    Notes:
1054:    The default is to lock converged eigenpairs when the method restarts.
1055:    This behaviour can be changed so that all directions are kept in the
1056:    working subspace even if already converged to working accuracy (the
1057:    non-locking variant).

1059:    Level: advanced

1061: .seealso: PEPTOARGetLocking()
1062: @*/
1063: PetscErrorCode PEPTOARSetLocking(PEP pep,PetscBool lock)
1064: {

1070:   PetscTryMethod(pep,"PEPTOARSetLocking_C",(PEP,PetscBool),(pep,lock));
1071:   return(0);
1072: }

1076: static PetscErrorCode PEPTOARGetLocking_TOAR(PEP pep,PetscBool *lock)
1077: {
1078:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

1081:   *lock = ctx->lock;
1082:   return(0);
1083: }

1087: /*@
1088:    PEPTOARGetLocking - Gets the locking flag used in the TOAR method.

1090:    Not Collective

1092:    Input Parameter:
1093: .  pep - the eigenproblem solver context

1095:    Output Parameter:
1096: .  lock - the locking flag

1098:    Level: advanced

1100: .seealso: PEPTOARSetLocking()
1101: @*/
1102: PetscErrorCode PEPTOARGetLocking(PEP pep,PetscBool *lock)
1103: {

1109:   PetscUseMethod(pep,"PEPTOARGetLocking_C",(PEP,PetscBool*),(pep,lock));
1110:   return(0);
1111: }

1115: PetscErrorCode PEPSetFromOptions_TOAR(PetscOptionItems *PetscOptionsObject,PEP pep)
1116: {
1118:   PetscBool      flg,lock;
1119:   PetscReal      keep;

1122:   PetscOptionsHead(PetscOptionsObject,"PEP TOAR Options");
1123:   PetscOptionsReal("-pep_toar_restart","Proportion of vectors kept after restart","PEPTOARSetRestart",0.5,&keep,&flg);
1124:   if (flg) {
1125:     PEPTOARSetRestart(pep,keep);
1126:   }
1127:   PetscOptionsBool("-pep_toar_locking","Choose between locking and non-locking variants","PEPTOARSetLocking",PETSC_FALSE,&lock,&flg);
1128:   if (flg) {
1129:     PEPTOARSetLocking(pep,lock);
1130:   }
1131:   PetscOptionsTail();
1132:   return(0);
1133: }

1137: PetscErrorCode PEPView_TOAR(PEP pep,PetscViewer viewer)
1138: {
1140:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
1141:   PetscBool      isascii;

1144:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
1145:   if (isascii) {
1146:     PetscViewerASCIIPrintf(viewer,"  TOAR: %d%% of basis vectors kept after restart\n",(int)(100*ctx->keep));
1147:     PetscViewerASCIIPrintf(viewer,"  TOAR: using the %slocking variant\n",ctx->lock?"":"non-");
1148:   }
1149:   return(0);
1150: }

1154: PetscErrorCode PEPDestroy_TOAR(PEP pep)
1155: {

1159:   PetscFree(pep->data);
1160:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",NULL);
1161:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",NULL);
1162:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",NULL);
1163:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",NULL);
1164:   return(0);
1165: }

1169: PETSC_EXTERN PetscErrorCode PEPCreate_TOAR(PEP pep)
1170: {
1171:   PEP_TOAR       *ctx;

1175:   PetscNewLog(pep,&ctx);
1176:   pep->data = (void*)ctx;
1177:   ctx->lock = PETSC_TRUE;

1179:   pep->ops->solve          = PEPSolve_TOAR;
1180:   pep->ops->setup          = PEPSetUp_TOAR;
1181:   pep->ops->setfromoptions = PEPSetFromOptions_TOAR;
1182:   pep->ops->destroy        = PEPDestroy_TOAR;
1183:   pep->ops->view           = PEPView_TOAR;
1184:   pep->ops->backtransform  = PEPBackTransform_Default;
1185:   pep->ops->computevectors = PEPComputeVectors_Default;
1186:   pep->ops->extractvectors = PEPExtractVectors_TOAR;
1187:   pep->ops->reset          = PEPReset_TOAR;
1188:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",PEPTOARSetRestart_TOAR);
1189:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",PEPTOARGetRestart_TOAR);
1190:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",PEPTOARSetLocking_TOAR);
1191:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",PEPTOARGetLocking_TOAR);
1192:   return(0);
1193: }