Actual source code: epskrylov.c
slepc-3.13.0 2020-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Common subroutines for all Krylov-type solvers
12: */
14: #include <slepc/private/epsimpl.h>
15: #include <slepc/private/slepcimpl.h>
16: #include <slepcblaslapack.h>
18: /*
19: EPSDelayedArnoldi - This function is equivalent to BVMatArnoldi but
20: performs the computation in a different way. The main idea is that
21: reorthogonalization is delayed to the next Arnoldi step. This version is
22: more scalable but in some cases convergence may stagnate.
23: */
24: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
25: {
27: PetscInt i,j,m=*M;
28: Vec u,t;
29: PetscScalar shh[100],*lhh,dot,dot2;
30: PetscReal norm1=0.0,norm2=1.0;
31: Vec vj,vj1,vj2;
34: if (m<=100) lhh = shh;
35: else {
36: PetscMalloc1(m,&lhh);
37: }
38: BVCreateVec(eps->V,&u);
39: BVCreateVec(eps->V,&t);
41: BVSetActiveColumns(eps->V,0,m);
42: for (j=k;j<m;j++) {
43: BVGetColumn(eps->V,j,&vj);
44: BVGetColumn(eps->V,j+1,&vj1);
45: STApply(eps->st,vj,vj1);
46: BVRestoreColumn(eps->V,j,&vj);
47: BVRestoreColumn(eps->V,j+1,&vj1);
49: BVDotColumnBegin(eps->V,j+1,H+ldh*j);
50: if (j>k) {
51: BVDotColumnBegin(eps->V,j,lhh);
52: BVGetColumn(eps->V,j,&vj);
53: VecDotBegin(vj,vj,&dot);
54: }
55: if (j>k+1) {
56: BVNormVecBegin(eps->V,u,NORM_2,&norm2);
57: BVGetColumn(eps->V,j-2,&vj2);
58: VecDotBegin(u,vj2,&dot2);
59: }
61: BVDotColumnEnd(eps->V,j+1,H+ldh*j);
62: if (j>k) {
63: BVDotColumnEnd(eps->V,j,lhh);
64: VecDotEnd(vj,vj,&dot);
65: BVRestoreColumn(eps->V,j,&vj);
66: }
67: if (j>k+1) {
68: BVNormVecEnd(eps->V,u,NORM_2,&norm2);
69: VecDotEnd(u,vj2,&dot2);
70: BVRestoreColumn(eps->V,j-2,&vj2);
71: }
73: if (j>k) {
74: norm1 = PetscSqrtReal(PetscRealPart(dot));
75: for (i=0;i<j;i++)
76: H[ldh*j+i] = H[ldh*j+i]/norm1;
77: H[ldh*j+j] = H[ldh*j+j]/dot;
79: BVCopyVec(eps->V,j,t);
80: BVScaleColumn(eps->V,j,1.0/norm1);
81: BVScaleColumn(eps->V,j+1,1.0/norm1);
82: }
84: BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
86: if (j>k) {
87: BVSetActiveColumns(eps->V,0,j);
88: BVMultVec(eps->V,-1.0,1.0,t,lhh);
89: BVSetActiveColumns(eps->V,0,m);
90: for (i=0;i<j;i++)
91: H[ldh*(j-1)+i] += lhh[i];
92: }
94: if (j>k+1) {
95: BVGetColumn(eps->V,j-1,&vj1);
96: VecCopy(u,vj1);
97: BVRestoreColumn(eps->V,j-1,&vj1);
98: BVScaleColumn(eps->V,j-1,1.0/norm2);
99: H[ldh*(j-2)+j-1] = norm2;
100: }
102: if (j<m-1) {
103: VecCopy(t,u);
104: }
105: }
107: BVNormVec(eps->V,t,NORM_2,&norm2);
108: VecScale(t,1.0/norm2);
109: BVGetColumn(eps->V,m-1,&vj1);
110: VecCopy(t,vj1);
111: BVRestoreColumn(eps->V,m-1,&vj1);
112: H[ldh*(m-2)+m-1] = norm2;
114: BVDotColumn(eps->V,m,lhh);
116: BVMultColumn(eps->V,-1.0,1.0,m,lhh);
117: for (i=0;i<m;i++)
118: H[ldh*(m-1)+i] += lhh[i];
120: BVNormColumn(eps->V,m,NORM_2,beta);
121: BVScaleColumn(eps->V,m,1.0 / *beta);
122: *breakdown = PETSC_FALSE;
124: if (m>100) { PetscFree(lhh); }
125: VecDestroy(&u);
126: VecDestroy(&t);
127: return(0);
128: }
130: /*
131: EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi,
132: but without reorthogonalization (only delayed normalization).
133: */
134: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
135: {
137: PetscInt i,j,m=*M;
138: PetscScalar dot;
139: PetscReal norm=0.0;
140: Vec vj,vj1;
143: BVSetActiveColumns(eps->V,0,m);
144: for (j=k;j<m;j++) {
145: BVGetColumn(eps->V,j,&vj);
146: BVGetColumn(eps->V,j+1,&vj1);
147: STApply(eps->st,vj,vj1);
148: BVRestoreColumn(eps->V,j+1,&vj1);
149: BVDotColumnBegin(eps->V,j+1,H+ldh*j);
150: if (j>k) {
151: VecDotBegin(vj,vj,&dot);
152: }
153: BVDotColumnEnd(eps->V,j+1,H+ldh*j);
154: if (j>k) {
155: VecDotEnd(vj,vj,&dot);
156: }
157: BVRestoreColumn(eps->V,j,&vj);
159: if (j>k) {
160: norm = PetscSqrtReal(PetscRealPart(dot));
161: BVScaleColumn(eps->V,j,1.0/norm);
162: H[ldh*(j-1)+j] = norm;
164: for (i=0;i<j;i++)
165: H[ldh*j+i] = H[ldh*j+i]/norm;
166: H[ldh*j+j] = H[ldh*j+j]/dot;
167: BVScaleColumn(eps->V,j+1,1.0/norm);
168: *beta = norm;
169: }
170: BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
171: }
173: *breakdown = PETSC_FALSE;
174: return(0);
175: }
177: /*
178: EPSKrylovConvergence_Filter - Specialized version for STFILTER.
179: */
180: PetscErrorCode EPSKrylovConvergence_Filter(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal gamma,PetscInt *kout)
181: {
183: PetscInt k,ninside,nconv;
184: PetscScalar re,im;
185: PetscReal resnorm;
188: ninside = 0; /* count how many eigenvalues are located in the interval */
189: for (k=kini;k<kini+nits;k++) {
190: if (PetscRealPart(eps->eigr[k]) < gamma) break;
191: ninside++;
192: }
193: eps->nev = ninside+kini; /* adjust eigenvalue count */
194: nconv = 0; /* count how many eigenvalues satisfy the convergence criterion */
195: for (k=kini;k<kini+ninside;k++) {
196: /* eigenvalue */
197: re = eps->eigr[k];
198: im = eps->eigi[k];
199: DSVectors(eps->ds,DS_MAT_X,&k,&resnorm);
200: resnorm *= beta;
201: /* error estimate */
202: (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
203: if (eps->errest[k] < eps->tol) nconv++;
204: else break;
205: }
206: *kout = kini+nconv;
207: PetscInfo4(eps,"Found %D eigenvalue approximations inside the inverval (gamma=%g), k=%D nconv=%D\n",ninside,(double)gamma,k,nconv);
208: return(0);
209: }
211: /*
212: EPSKrylovConvergence - Implements the loop that checks for convergence
213: in Krylov methods.
215: Input Parameters:
216: eps - the eigensolver; some error estimates are updated in eps->errest
217: getall - whether all residuals must be computed
218: kini - initial value of k (the loop variable)
219: nits - number of iterations of the loop
220: V - set of basis vectors (used only if trueresidual is activated)
221: nv - number of vectors to process (dimension of Q, columns of V)
222: beta - norm of f (the residual vector of the Arnoldi/Lanczos factorization)
223: corrf - correction factor for residual estimates (only in harmonic KS)
225: Output Parameters:
226: kout - the first index where the convergence test failed
227: */
228: PetscErrorCode EPSKrylovConvergence(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal betat,PetscReal corrf,PetscInt *kout)
229: {
231: PetscInt k,newk,newk2,marker,ld,inside;
232: PetscScalar re,im,*Zr,*Zi,*X;
233: PetscReal resnorm,gamma,lerrest;
234: PetscBool isshift,isfilter,refined,istrivial;
235: Vec x=NULL,y=NULL,w[3];
238: if (eps->which == EPS_ALL) {
239: PetscObjectTypeCompare((PetscObject)eps->st,STFILTER,&isfilter);
240: if (isfilter) {
241: STFilterGetThreshold(eps->st,&gamma);
242: EPSKrylovConvergence_Filter(eps,getall,kini,nits,beta,gamma,kout);
243: return(0);
244: }
245: }
246: RGIsTrivial(eps->rg,&istrivial);
247: if (eps->trueres) {
248: BVCreateVec(eps->V,&x);
249: BVCreateVec(eps->V,&y);
250: BVCreateVec(eps->V,&w[0]);
251: BVCreateVec(eps->V,&w[2]);
252: #if !defined(PETSC_USE_COMPLEX)
253: BVCreateVec(eps->V,&w[1]);
254: #else
255: w[1] = NULL;
256: #endif
257: }
258: DSGetLeadingDimension(eps->ds,&ld);
259: DSGetRefined(eps->ds,&refined);
260: PetscObjectTypeCompare((PetscObject)eps->st,STSHIFT,&isshift);
261: marker = -1;
262: if (eps->trackall) getall = PETSC_TRUE;
263: for (k=kini;k<kini+nits;k++) {
264: /* eigenvalue */
265: re = eps->eigr[k];
266: im = eps->eigi[k];
267: if (!istrivial || eps->trueres || isshift || eps->conv==EPS_CONV_NORM) {
268: STBackTransform(eps->st,1,&re,&im);
269: }
270: if (!istrivial) {
271: RGCheckInside(eps->rg,1,&re,&im,&inside);
272: if (marker==-1 && inside<0) marker = k;
273: if (!(eps->trueres || isshift || eps->conv==EPS_CONV_NORM)) { /* make sure eps->converged below uses the right value */
274: re = eps->eigr[k];
275: im = eps->eigi[k];
276: }
277: }
278: newk = k;
279: DSVectors(eps->ds,DS_MAT_X,&newk,&resnorm);
280: if (eps->trueres) {
281: DSGetArray(eps->ds,DS_MAT_X,&X);
282: Zr = X+k*ld;
283: if (newk==k+1) Zi = X+newk*ld;
284: else Zi = NULL;
285: EPSComputeRitzVector(eps,Zr,Zi,eps->V,x,y);
286: DSRestoreArray(eps->ds,DS_MAT_X,&X);
287: EPSComputeResidualNorm_Private(eps,PETSC_FALSE,re,im,x,y,w,&resnorm);
288: }
289: else if (!refined) resnorm *= beta*corrf;
290: /* error estimate */
291: (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
292: if (marker==-1 && eps->errest[k] >= eps->tol) marker = k;
293: if (eps->twosided) {
294: newk2 = k;
295: DSVectors(eps->dsts,DS_MAT_X,&newk2,&resnorm);
296: resnorm *= betat;
297: (*eps->converged)(eps,re,im,resnorm,&lerrest,eps->convergedctx);
298: eps->errest[k] = PetscMax(eps->errest[k],lerrest);
299: if (marker==-1 && lerrest >= eps->tol) marker = k;
300: }
301: if (newk==k+1) {
302: eps->errest[k+1] = eps->errest[k];
303: k++;
304: }
305: if (marker!=-1 && !getall) break;
306: }
307: if (marker!=-1) k = marker;
308: *kout = k;
309: if (eps->trueres) {
310: VecDestroy(&x);
311: VecDestroy(&y);
312: VecDestroy(&w[0]);
313: VecDestroy(&w[2]);
314: #if !defined(PETSC_USE_COMPLEX)
315: VecDestroy(&w[1]);
316: #endif
317: }
318: return(0);
319: }
321: PetscErrorCode EPSPseudoLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscReal *cos,Vec w)
322: {
324: PetscInt j,m = *M,i,ld,l;
325: Vec vj,vj1;
326: PetscScalar *hwork,lhwork[100];
327: PetscReal norm,norm1,norm2,t,*f,sym=0.0,fro=0.0;
328: PetscBLASInt j_,one=1;
331: DSGetLeadingDimension(eps->ds,&ld);
332: DSGetDimensions(eps->ds,NULL,NULL,&l,NULL,NULL);
333: if (cos) *cos = 1.0;
334: if (m > 100) {
335: PetscMalloc1(m,&hwork);
336: } else hwork = lhwork;
338: BVSetActiveColumns(eps->V,0,m);
339: for (j=k;j<m;j++) {
340: BVGetColumn(eps->V,j,&vj);
341: BVGetColumn(eps->V,j+1,&vj1);
342: STApply(eps->st,vj,vj1);
343: BVRestoreColumn(eps->V,j,&vj);
344: BVRestoreColumn(eps->V,j+1,&vj1);
345: BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);
346: alpha[j] = PetscRealPart(hwork[j]);
347: beta[j] = PetscAbsReal(norm);
348: DSGetArrayReal(eps->ds,DS_MAT_T,&f);
349: if (j==k) {
350: for (i=l;i<j-1;i++) hwork[i]-= f[2*ld+i];
351: for (i=0;i<l;i++) hwork[i] = 0.0;
352: }
353: DSRestoreArrayReal(eps->ds,DS_MAT_T,&f);
354: hwork[j-1] -= beta[j-1];
355: PetscBLASIntCast(j,&j_);
356: sym = SlepcAbs(BLASnrm2_(&j_,hwork,&one),sym);
357: fro = SlepcAbs(fro,SlepcAbs(alpha[j],beta[j]));
358: if (j>0) fro = SlepcAbs(fro,beta[j-1]);
359: if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*eps->tol)) { *symmlost = PETSC_TRUE; *M=j+1; break; }
360: omega[j+1] = (norm<0.0)? -1.0: 1.0;
361: BVScaleColumn(eps->V,j+1,1.0/norm);
362: /* */
363: if (cos) {
364: BVGetColumn(eps->V,j+1,&vj1);
365: VecNorm(vj1,NORM_2,&norm1);
366: BVApplyMatrix(eps->V,vj1,w);
367: BVRestoreColumn(eps->V,j+1,&vj1);
368: VecNorm(w,NORM_2,&norm2);
369: t = 1.0/(norm1*norm2);
370: if (*cos>t) *cos = t;
371: }
372: }
373: if (m > 100) {
374: PetscFree(hwork);
375: }
376: return(0);
377: }