Actual source code: ks-harm.c
1: /*
3: SLEPc eigensolver: "krylovschur"
5: Method: Krylov-Schur
7: Algorithm:
9: Single-vector Krylov-Schur method for both symmetric and non-symmetric
10: problems.
12: References:
14: [1] "Krylov-Schur Methods in SLEPc", SLEPc Technical Report STR-7,
15: available at http://www.grycap.upv.es/slepc.
17: [2] G.W. Stewart, "A Krylov-Schur Algorithm for Large Eigenproblems",
18: SIAM J. Matrix Analysis and App., 23(3), pp. 601-614, 2001.
20: Last update: Feb 2009
22: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
23: SLEPc - Scalable Library for Eigenvalue Problem Computations
24: Copyright (c) 2002-2009, Universidad Politecnica de Valencia, Spain
26: This file is part of SLEPc.
27:
28: SLEPc is free software: you can redistribute it and/or modify it under the
29: terms of version 3 of the GNU Lesser General Public License as published by
30: the Free Software Foundation.
32: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
33: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
34: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
35: more details.
37: You should have received a copy of the GNU Lesser General Public License
38: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
39: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
40: */
42: #include private/epsimpl.h
43: #include slepcblaslapack.h
47: /*
48: EPSTranslateHarmonic - Computes a translation of the Krylov decomposition
49: in order to perform a harmonic extraction.
51: On input:
52: S is the Rayleigh quotient (order m, leading dimension is lds)
53: tau is the translation amount
54: b is assumed to be beta*e_m^T
56: On output:
57: g = (B-sigma*eye(m))'\b
58: S is updated as S + g*b'
60: Workspace:
61: work is workspace to store a working copy of S and the pivots (int
62: of length m)
63: */
64: PetscErrorCode EPSTranslateHarmonic(PetscInt m_,PetscScalar *S,PetscInt lds,PetscScalar tau,PetscScalar beta,PetscScalar *g,PetscScalar *work)
65: {
66: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
68: SETERRQ(PETSC_ERR_SUP,"GETRF,GETRS - Lapack routines are unavailable.");
69: #else
71: PetscInt i,j;
72: PetscBLASInt info,m,one = 1;
73: PetscScalar *B = work;
74: PetscBLASInt *ipiv = (PetscBLASInt*)(work+m_*m_);
77: m = PetscBLASIntCast(m_);
78: /* Copy S to workspace B */
79: for (i=0;i<m;i++)
80: for (j=0;j<m;j++)
81: B[i+j*m] = S[i+j*lds];
82: /* Vector g initialy stores b */
83: PetscMemzero(g,m*sizeof(PetscScalar));
84: g[m-1] = beta;
85:
86: /* g = (B-sigma*eye(m))'\b */
87: for (i=0;i<m;i++)
88: B[i+i*m] -= tau;
89: LAPACKgetrf_(&m,&m,B,&m,ipiv,&info);
90: if (info<0) SETERRQ(PETSC_ERR_LIB,"Bad argument to LU factorization");
91: if (info>0) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
92: PetscLogFlops(2*m*m*m/3);
93: LAPACKgetrs_("C",&m,&one,B,&m,ipiv,g,&m,&info);
94: if (info) SETERRQ(PETSC_ERR_LIB,"GETRS - Bad solve");
95: PetscLogFlops(2*m*m-m);
97: /* S = S + g*b' */
98: for (i=0;i<m;i++)
99: S[i+(m-1)*lds] = S[i+(m-1)*lds] + g[i]*beta;
101: return(0);
102: #endif
103: }
107: /*
108: EPSRecoverHarmonic - Computes a translation of the truncated Krylov
109: decomposition in order to recover the original non-translated state
111: On input:
112: S is the truncated Rayleigh quotient (size n, leading dimension m)
113: k and l indicate the active columns of S
114: [U, u] is the basis of the Krylov subspace
115: g is the vector computed in the original translation
116: Q is the similarity transformation used to reduce to sorted Schur form
118: On output:
119: S is updated as S + g*b'
120: u is re-orthonormalized with respect to U
121: b is re-scaled
122: g is destroyed
124: Workspace:
125: ghat is workspace to store a vector of length n
126: */
127: PetscErrorCode EPSRecoverHarmonic(PetscScalar *S,PetscInt n_,PetscInt k,PetscInt l,PetscInt m_,PetscScalar *g,PetscScalar *Q,Vec *U,Vec u,PetscScalar *ghat)
128: {
131: PetscBLASInt one=1,ncol=k+l,n,m;
132: PetscScalar done=1.0,dmone=-1.0,dzero=0.0;
133: PetscReal gamma,gnorm;
134: PetscBLASInt i,j;
137: n = PetscBLASIntCast(n_);
138: m = PetscBLASIntCast(m_);
140: /* g^ = -Q(:,idx)'*g */
141: BLASgemv_("C",&n,&ncol,&dmone,Q,&n,g,&one,&dzero,ghat,&one);
143: /* S = S + g^*b' */
144: for (i=0;i<k+l;i++) {
145: for (j=k;j<k+l;j++) {
146: S[i+j*m] += ghat[i]*S[k+l+j*m];
147: }
148: }
150: /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
151: BLASgemv_("N",&n,&ncol,&done,Q,&n,ghat,&one,&done,g,&one);
153: /* gamma u^ = u - U*g~ */
154: for (i=0;i<n;i++)
155: g[i] = -g[i];
156: VecMAXPY(u,n,g,U);
158: /* Renormalize u */
159: gnorm = 0.0;
160: for (i=0;i<n;i++)
161: gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
162: gamma = sqrt(1.0+gnorm);
163: VecScale(u,1.0/gamma);
165: /* b = gamma*b */
166: for (i=k;i<k+l;i++) {
167: S[i*m+k+l] *= gamma;
168: }
169: return(0);
170: }
174: /*
175: EPSProjectedKSHarmonic - Solves the projected eigenproblem in the
176: Krylov-Schur method (non-symmetric harmonic case).
178: On input:
179: l is the number of vectors kept in previous restart (0 means first restart)
180: S is the projected matrix (leading dimension is lds)
182: On output:
183: S has (real) Schur form with diagonal blocks sorted appropriately
184: Q contains the corresponding Schur vectors (order n, leading dimension n)
185: */
186: PetscErrorCode EPSProjectedKSHarmonic(EPS eps,PetscInt l,PetscScalar *S,PetscInt lds,PetscScalar *Q,PetscInt n)
187: {
191: /* Reduce S to Hessenberg form, S <- Q S Q' */
192: EPSDenseHessenberg(n,eps->nconv,S,lds,Q);
193: /* Reduce S to (quasi-)triangular form, S <- Q S Q' */
194: EPSDenseSchur(n,eps->nconv,S,lds,Q,eps->eigr,eps->eigi);
195: /* Sort the remaining columns of the Schur form */
196: EPSSortDenseSchurTarget(n,eps->nconv,S,lds,Q,eps->eigr,eps->eigi,eps->target,eps->which);
197: return(0);
198: }
202: PetscErrorCode EPSSolve_KRYLOVSCHUR_HARMONIC(EPS eps)
203: {
205: PetscInt i,k,l,lwork,nv;
206: Vec u=eps->work[1];
207: PetscScalar *S=eps->T,*Q,*g,*work;
208: PetscReal beta,gnorm;
209: PetscTruth breakdown;
212: PetscMemzero(S,eps->ncv*eps->ncv*sizeof(PetscScalar));
213: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&Q);
214: lwork = (eps->ncv+4)*eps->ncv;
215: PetscMalloc(lwork*sizeof(PetscScalar),&work);
216: PetscMalloc(eps->ncv*sizeof(PetscScalar),&g);
218: /* Get the starting Arnoldi vector */
219: EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
220: l = 0;
221:
222: /* Restart loop */
223: while (eps->reason == EPS_CONVERGED_ITERATING) {
224: eps->its++;
226: /* Compute an nv-step Arnoldi factorization */
227: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
228: EPSBasicArnoldi(eps,PETSC_FALSE,S,eps->ncv,eps->V,eps->nconv+l,&nv,u,&beta,&breakdown);
229: VecScale(u,1.0/beta);
231: /* Compute translation of Krylov decomposition */
232: EPSTranslateHarmonic(nv,S,eps->ncv,eps->target,(PetscScalar)beta,g,work);
234: /* Solve projected problem and compute residual norm estimates */
235: EPSProjectedKSHarmonic(eps,l,S,eps->ncv,Q,nv);
236: ArnoldiResiduals(S,eps->ncv,Q,beta,eps->nconv,nv,eps->eigr,eps->eigi,eps->errest,work);
238: /* Fix residual norms */
239: gnorm = 0.0;
240: for (i=0;i<nv;i++)
241: gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
242: for (i=eps->nconv;i<nv;i++)
243: eps->errest[i] *= sqrt(1.0+gnorm);
245: /* Check convergence */
246: k = eps->nconv;
247: while (k<nv && eps->errest[k]<eps->tol) k++;
248: if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
249: if (k >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
250:
251: /* Update l */
252: if (eps->reason != EPS_CONVERGED_ITERATING || breakdown) l = 0;
253: else {
254: l = (nv-k)/2;
255: #if !defined(PETSC_USE_COMPLEX)
256: if (S[(k+l-1)*(eps->ncv+1)+1] != 0.0) {
257: if (k+l<nv-1) l = l+1;
258: else l = l-1;
259: }
260: #endif
261: }
262:
263: if (eps->reason == EPS_CONVERGED_ITERATING) {
264: if (breakdown) {
265: /* Start a new Arnoldi factorization */
266: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%i norm=%g)\n",eps->its,beta);
267: EPSGetStartVector(eps,k,eps->V[k],&breakdown);
268: if (breakdown) {
269: eps->reason = EPS_DIVERGED_BREAKDOWN;
270: PetscInfo(eps,"Unable to generate more start vectors\n");
271: }
272: } else {
273: /* Prepare the Rayleigh quotient for restart */
274: for (i=k;i<k+l;i++) {
275: S[i*eps->ncv+k+l] = Q[(i+1)*nv-1]*beta;
276: }
277: EPSRecoverHarmonic(S,nv,k,l,eps->ncv,g,Q,eps->V,u,work);
278: }
279: }
280: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
281: SlepcUpdateVectors(nv,eps->V,eps->nconv,k+l,Q,nv,PETSC_FALSE);
282:
283: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
284: VecCopy(u,eps->V[k+l]);
285: }
286: eps->nconv = k;
288: EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,nv);
289:
290: }
292: PetscFree(Q);
293: PetscFree(work);
294: PetscFree(g);
295: return(0);
296: }