Actual source code: trlanczos.c
1: /*
3: SLEPc singular value solver: "trlanczos"
5: Method: Golub-Kahan-Lanczos bidiagonalization with thick-restart
7: Last update: Jun 2007
9: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
10: SLEPc - Scalable Library for Eigenvalue Problem Computations
11: Copyright (c) 2002-2009, Universidad Politecnica de Valencia, Spain
13: This file is part of SLEPc.
14:
15: SLEPc is free software: you can redistribute it and/or modify it under the
16: terms of version 3 of the GNU Lesser General Public License as published by
17: the Free Software Foundation.
19: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
20: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
22: more details.
24: You should have received a copy of the GNU Lesser General Public License
25: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
26: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
27: */
29: #include private/svdimpl.h
30: #include private/ipimpl.h
31: #include slepcblaslapack.h
33: typedef struct {
34: PetscTruth oneside;
35: } SVD_TRLANCZOS;
39: PetscErrorCode SVDSetUp_TRLANCZOS(SVD svd)
40: {
41: PetscErrorCode ierr;
42: PetscInt i,N,nloc;
43: PetscScalar *pU;
46: SVDMatGetSize(svd,PETSC_NULL,&N);
47: if (svd->ncv) { /* ncv set */
48: if (svd->ncv<svd->nsv) SETERRQ(1,"The value of ncv must be at least nsv");
49: }
50: else if (svd->mpd) { /* mpd set */
51: svd->ncv = PetscMin(N,svd->nsv+svd->mpd);
52: }
53: else { /* neither set: defaults depend on nsv being small or large */
54: if (svd->nsv<500) svd->ncv = PetscMin(N,PetscMax(2*svd->nsv,10));
55: else { svd->mpd = 500; svd->ncv = PetscMin(N,svd->nsv+svd->mpd); }
56: }
57: if (!svd->mpd) svd->mpd = svd->ncv;
58: if (svd->ncv>svd->nsv+svd->mpd) SETERRQ(1,"The value of ncv must not be larger than nev+mpd");
59: if (!svd->max_it)
60: svd->max_it = PetscMax(N/svd->ncv,100);
61: if (svd->ncv!=svd->n) {
62: if (svd->U) {
63: VecGetArray(svd->U[0],&pU);
64: for (i=0;i<svd->n;i++) { VecDestroy(svd->U[i]); }
65: PetscFree(pU);
66: PetscFree(svd->U);
67: }
68: PetscMalloc(sizeof(Vec)*svd->ncv,&svd->U);
69: SVDMatGetLocalSize(svd,&nloc,PETSC_NULL);
70: PetscMalloc(svd->ncv*nloc*sizeof(PetscScalar),&pU);
71: for (i=0;i<svd->ncv;i++) {
72: VecCreateMPIWithArray(((PetscObject)svd)->comm,nloc,PETSC_DECIDE,pU+i*nloc,&svd->U[i]);
73: }
74: }
75: return(0);
76: }
80: static PetscErrorCode SVDOneSideTRLanczosMGS(SVD svd,PetscReal *alpha,PetscReal *beta,PetscScalar* bb,Vec *V,Vec v,Vec* U,PetscInt nconv,PetscInt l,PetscInt n,PetscScalar* work,Vec wv,Vec wu)
81: {
83: PetscReal a,b;
84: PetscInt i,k=nconv+l;
87: SVDMatMult(svd,PETSC_FALSE,V[k],U[k]);
88: if (l>0) {
89: VecSet(wu,0.0);
90: VecMAXPY(wu,l,bb,U+nconv);
91: VecAXPY(U[k],-1.0,wu);
92: }
93: IPNorm(svd->ip,U[k],&a);
94: VecScale(U[k],1.0/a);
95: alpha[0] = a;
96: for (i=k+1;i<n;i++) {
97: SVDMatMult(svd,PETSC_TRUE,U[i-1],V[i]);
98: IPOrthogonalize(svd->ip,i,PETSC_NULL,V,V[i],work,&b,PETSC_NULL,wv,PETSC_NULL);
99: VecScale(V[i],1.0/b);
100: beta[i-k-1] = b;
101:
102: SVDMatMult(svd,PETSC_FALSE,V[i],U[i]);
103: VecAXPY(U[i],-b,U[i-1]);
104: IPNorm(svd->ip,U[i],&a);
105: VecScale(U[i],1.0/a);
106: alpha[i-k] = a;
107: }
108: SVDMatMult(svd,PETSC_TRUE,U[n-1],v);
109: IPOrthogonalize(svd->ip,n,PETSC_NULL,V,v,work,&b,PETSC_NULL,wv,PETSC_NULL);
110: beta[n-k-1] = b;
111: return(0);
112: }
116: static PetscErrorCode SVDOneSideTRLanczosCGS(SVD svd,PetscReal *alpha,PetscReal *beta,PetscScalar* bb,Vec *V,Vec v,Vec* U,PetscInt nconv,PetscInt l,PetscInt n,PetscScalar* work,Vec wv,Vec wu)
117: {
119: PetscReal a,b,sum,onorm;
120: PetscScalar dot;
121: PetscInt i,j,k=nconv+l;
124: SVDMatMult(svd,PETSC_FALSE,V[k],U[k]);
125: if (l>0) {
126: VecSet(wu,0.0);
127: VecMAXPY(wu,l,bb,U+nconv);
128: VecAXPY(U[k],-1.0,wu);
129: }
130: for (i=k+1;i<n;i++) {
131: SVDMatMult(svd,PETSC_TRUE,U[i-1],V[i]);
132: IPNormBegin(svd->ip,U[i-1],&a);
133: if (svd->ip->orthog_ref == IP_ORTH_REFINE_IFNEEDED) {
134: IPInnerProductBegin(svd->ip,V[i],V[i],&dot);
135: }
136: IPMInnerProductBegin(svd->ip,V[i],i,V,work);
137: IPNormEnd(svd->ip,U[i-1],&a);
138: if (svd->ip->orthog_ref == IP_ORTH_REFINE_IFNEEDED) {
139: IPInnerProductEnd(svd->ip,V[i],V[i],&dot);
140: }
141: IPMInnerProductEnd(svd->ip,V[i],i,V,work);
142:
143: VecScale(U[i-1],1.0/a);
144: VecScale(V[i],1.0/a);
145: for (j=0;j<i;j++) work[j] = - work[j] / a;
146: VecMAXPY(V[i],i,work,V);
148: switch (svd->ip->orthog_ref) {
149: case IP_ORTH_REFINE_NEVER:
150: IPNorm(svd->ip,V[i],&b);
151: break;
152: case IP_ORTH_REFINE_ALWAYS:
153: IPOrthogonalizeCGS(svd->ip,i,PETSC_NULL,V,V[i],work,PETSC_NULL,&b,wv);
154: break;
155: case IP_ORTH_REFINE_IFNEEDED:
156: onorm = sqrt(PetscRealPart(dot)) / a;
157: sum = 0.0;
158: for (j=0;j<i;j++) {
159: sum += PetscRealPart(work[j] * PetscConj(work[j]));
160: }
161: b = PetscRealPart(dot)/(a*a) - sum;
162: if (b>0.0) b = sqrt(b);
163: else {
164: IPNorm(svd->ip,V[i],&b);
165: }
166: if (b < svd->ip->orthog_eta * onorm) {
167: IPOrthogonalizeCGS(svd->ip,i,PETSC_NULL,V,V[i],work,PETSC_NULL,&b,wv);
168: }
169: break;
170: }
171:
172: VecScale(V[i],1.0/b);
173:
174: SVDMatMult(svd,PETSC_FALSE,V[i],U[i]);
175: VecAXPY(U[i],-b,U[i-1]);
177: alpha[i-k-1] = a;
178: beta[i-k-1] = b;
179: }
180: SVDMatMult(svd,PETSC_TRUE,U[n-1],v);
181: IPNormBegin(svd->ip,U[n-1],&a);
182: if (svd->ip->orthog_ref == IP_ORTH_REFINE_IFNEEDED) {
183: IPInnerProductBegin(svd->ip,v,v,&dot);
184: }
185: IPMInnerProductBegin(svd->ip,v,n,V,work);
186: IPNormEnd(svd->ip,U[n-1],&a);
187: if (svd->ip->orthog_ref == IP_ORTH_REFINE_IFNEEDED) {
188: IPInnerProductEnd(svd->ip,v,v,&dot);
189: }
190: IPMInnerProductEnd(svd->ip,v,n,V,work);
191:
192: VecScale(U[n-1],1.0/a);
193: VecScale(v,1.0/a);
194: for (j=0;j<n;j++) work[j] = - work[j] / a;
195: VecMAXPY(v,n,work,V);
197: switch (svd->ip->orthog_ref) {
198: case IP_ORTH_REFINE_NEVER:
199: IPNorm(svd->ip,v,&b);
200: break;
201: case IP_ORTH_REFINE_ALWAYS:
202: IPOrthogonalizeCGS(svd->ip,n,PETSC_NULL,V,v,work,PETSC_NULL,&b,wv);
203: break;
204: case IP_ORTH_REFINE_IFNEEDED:
205: onorm = sqrt(PetscRealPart(dot)) / a;
206: sum = 0.0;
207: for (j=0;j<i;j++) {
208: sum += PetscRealPart(work[j] * PetscConj(work[j]));
209: }
210: b = PetscRealPart(dot)/(a*a) - sum;
211: if (b>0.0) b = sqrt(b);
212: else {
213: IPNorm(svd->ip,v,&b);
214: }
215: if (b < svd->ip->orthog_eta * onorm) {
216: IPOrthogonalizeCGS(svd->ip,n,PETSC_NULL,V,v,work,PETSC_NULL,&b,wv);
217: }
218: break;
219: }
220:
221: alpha[n-k-1] = a;
222: beta[n-k-1] = b;
223: return(0);
224: }
228: PetscErrorCode SVDSolve_TRLANCZOS(SVD svd)
229: {
231: SVD_TRLANCZOS *lanczos = (SVD_TRLANCZOS *)svd->data;
232: PetscReal *alpha,*beta,norm;
233: PetscScalar *b,*Q,*PT,*swork;
234: PetscInt i,j,k,l,m,n,nv;
235: Vec v,wv,wu;
236: PetscTruth conv;
237: IPOrthogonalizationType orthog;
238:
240: /* allocate working space */
241: PetscMalloc(sizeof(PetscReal)*svd->n,&alpha);
242: PetscMalloc(sizeof(PetscReal)*svd->n,&beta);
243: PetscMalloc(sizeof(PetscScalar)*svd->n,&b);
244: PetscMalloc(sizeof(PetscScalar)*svd->n*svd->n,&Q);
245: PetscMalloc(sizeof(PetscScalar)*svd->n*svd->n,&PT);
246: #if !defined(PETSC_USE_COMPLEX)
247: if (svd->which == SVD_SMALLEST) {
248: #endif
249: PetscMalloc(sizeof(PetscScalar)*svd->n*svd->n,&swork);
250: #if !defined(PETSC_USE_COMPLEX)
251: } else {
252: PetscMalloc(sizeof(PetscScalar)*svd->n,&swork);
253: }
254: #endif
255: VecDuplicate(svd->V[0],&v);
256: VecDuplicate(svd->V[0],&wv);
257: VecDuplicate(svd->U[0],&wu);
258: IPGetOrthogonalization(svd->ip,&orthog,PETSC_NULL,PETSC_NULL);
259:
260: /* normalize start vector */
261: VecCopy(svd->vec_initial,svd->V[0]);
262: VecNormalize(svd->V[0],&norm);
263:
264: l = 0;
265: while (svd->reason == SVD_CONVERGED_ITERATING) {
266: svd->its++;
268: /* inner loop */
269: nv = PetscMin(svd->nconv+svd->mpd,svd->n);
270: if (lanczos->oneside) {
271: if (orthog == IP_MGS_ORTH) {
272: SVDOneSideTRLanczosMGS(svd,alpha,beta,b+svd->nconv,svd->V,v,svd->U,svd->nconv,l,nv,swork,wv,wu);
273: } else {
274: SVDOneSideTRLanczosCGS(svd,alpha,beta,b+svd->nconv,svd->V,v,svd->U,svd->nconv,l,nv,swork,wv,wu);
275: }
276: } else {
277: SVDTwoSideLanczos(svd,alpha,beta,svd->V,v,svd->U,svd->nconv+l,nv,swork,wv,wu);
278: }
279: VecScale(v,1.0/beta[nv-svd->nconv-l-1]);
280:
281: /* compute SVD of general matrix */
282: n = nv - svd->nconv;
283: /* first l columns */
284: for (j=0;j<l;j++) {
285: for (i=0;i<j;i++) Q[j*n+i] = 0.0;
286: Q[j*n+j] = svd->sigma[svd->nconv+j];
287: for (i=j+1;i<n;i++) Q[j*n+i] = 0.0;
288: }
289: /* l+1 column */
290: for (i=0;i<l;i++) Q[l*n+i] = b[i+svd->nconv];
291: Q[l*n+l] = alpha[0];
292: for (i=l+1;i<n;i++) Q[l*n+i] = 0.0;
293: /* rest of matrix */
294: for (j=l+1;j<n;j++) {
295: for (i=0;i<j-1;i++) Q[j*n+i] = 0.0;
296: Q[j*n+j-1] = beta[j-l-1];
297: Q[j*n+j] = alpha[j-l];
298: for (i=j+1;i<n;i++) Q[j*n+i] = 0.0;
299: }
300: SVDDense(n,n,Q,alpha,PETSC_NULL,PT);
302: /* compute error estimates */
303: k = 0;
304: conv = PETSC_TRUE;
305: for (i=svd->nconv;i<nv;i++) {
306: if (svd->which == SVD_SMALLEST) j = n-i+svd->nconv-1;
307: else j = i-svd->nconv;
308: svd->sigma[i] = alpha[j];
309: b[i] = Q[j*n+n-1]*beta[n-l-1];
310: svd->errest[i] = PetscAbsScalar(b[i]);
311: if (alpha[j] > svd->tol) svd->errest[i] /= alpha[j];
312: if (conv) {
313: if (svd->errest[i] < svd->tol) k++;
314: else conv = PETSC_FALSE;
315: }
316: }
317:
318: /* check convergence and update l */
319: if (svd->its >= svd->max_it) svd->reason = SVD_DIVERGED_ITS;
320: if (svd->nconv+k >= svd->nsv) svd->reason = SVD_CONVERGED_TOL;
321: if (svd->reason != SVD_CONVERGED_ITERATING) l = 0;
322: else l = PetscMax((nv - svd->nconv - k) / 2,0);
323:
324: /* compute converged singular vectors and restart vectors*/
325: #if !defined(PETSC_USE_COMPLEX)
326: if (svd->which == SVD_SMALLEST) {
327: #endif
328: for (i=0;i<k+l;i++) {
329: if (svd->which == SVD_SMALLEST) j = n-i-1;
330: else j = i;
331: for (m=0;m<n;m++) swork[j*n+m] = PT[m*n+j];
332: }
333: SlepcUpdateVectors(n,svd->V+svd->nconv,0,k+l,swork,n,PETSC_FALSE);
334: for (i=0;i<k+l;i++) {
335: if (svd->which == SVD_SMALLEST) j = n-i-1;
336: else j = i;
337: for (m=0;m<n;m++) swork[j*n+m] = Q[j*n+m];
338: }
339: SlepcUpdateVectors(n,svd->U+svd->nconv,0,k+l,swork,n,PETSC_FALSE);
340: #if !defined(PETSC_USE_COMPLEX)
341: } else {
342: SlepcUpdateVectors(n,svd->V+svd->nconv,0,k+l,PT,n,PETSC_TRUE);
343: SlepcUpdateVectors(n,svd->U+svd->nconv,0,k+l,Q,n,PETSC_FALSE);
344: }
345: #endif
346:
347: /* copy the last vector to be the next initial vector */
348: if (svd->reason == SVD_CONVERGED_ITERATING) {
349: VecCopy(v,svd->V[svd->nconv+k+l]);
350: }
351:
352: svd->nconv += k;
353: SVDMonitor(svd,svd->its,svd->nconv,svd->sigma,svd->errest,nv);
354: }
355:
356: /* orthonormalize U columns in one side method */
357: if (lanczos->oneside) {
358: for (i=0;i<svd->nconv;i++) {
359: IPOrthogonalize(svd->ip,i,PETSC_NULL,svd->U,svd->U[i],PETSC_NULL,&norm,PETSC_NULL,PETSC_NULL,PETSC_NULL);
360: VecScale(svd->U[i],1.0/norm);
361: }
362: }
363:
364: /* free working space */
365: VecDestroy(v);
366: VecDestroy(wv);
367: VecDestroy(wu);
369: PetscFree(alpha);
370: PetscFree(beta);
371: PetscFree(b);
372: PetscFree(Q);
373: PetscFree(PT);
374: PetscFree(swork);
375: return(0);
376: }
380: PetscErrorCode SVDSetFromOptions_TRLANCZOS(SVD svd)
381: {
383: SVD_TRLANCZOS *lanczos = (SVD_TRLANCZOS *)svd->data;
386: PetscOptionsBegin(((PetscObject)svd)->comm,((PetscObject)svd)->prefix,"TRLANCZOS Singular Value Solver Options","SVD");
387: PetscOptionsTruth("-svd_trlanczos_oneside","Lanczos one-side reorthogonalization","SVDTRLanczosSetOneSide",PETSC_FALSE,&lanczos->oneside,PETSC_NULL);
388: PetscOptionsEnd();
389: return(0);
390: }
395: PetscErrorCode SVDTRLanczosSetOneSide_TRLANCZOS(SVD svd,PetscTruth oneside)
396: {
397: SVD_TRLANCZOS *lanczos = (SVD_TRLANCZOS *)svd->data;
400: lanczos->oneside = oneside;
401: return(0);
402: }
407: /*@
408: SVDTRLanczosSetOneSide - Indicate if the variant of the Lanczos method
409: to be used is one-sided or two-sided.
411: Collective on SVD
413: Input Parameters:
414: + svd - singular value solver
415: - oneside - boolean flag indicating if the method is one-sided or not
417: Options Database Key:
418: . -svd_trlanczos_oneside <boolean> - Indicates the boolean flag
420: Note:
421: By default, a two-sided variant is selected, which is sometimes slightly
422: more robust. However, the one-sided variant is faster because it avoids
423: the orthogonalization associated to left singular vectors.
425: Level: advanced
427: .seealso: SVDLanczosSetOneSide()
428: @*/
429: PetscErrorCode SVDTRLanczosSetOneSide(SVD svd,PetscTruth oneside)
430: {
431: PetscErrorCode ierr, (*f)(SVD,PetscTruth);
435: PetscObjectQueryFunction((PetscObject)svd,"SVDTRLanczosSetOneSide_C",(void (**)())&f);
436: if (f) {
437: (*f)(svd,oneside);
438: }
439: return(0);
440: }
444: PetscErrorCode SVDView_TRLANCZOS(SVD svd,PetscViewer viewer)
445: {
447: SVD_TRLANCZOS *lanczos = (SVD_TRLANCZOS *)svd->data;
450: PetscViewerASCIIPrintf(viewer,"Lanczos reorthogonalization: %s\n",lanczos->oneside ? "one-side" : "two-side");
451: return(0);
452: }
457: PetscErrorCode SVDCreate_TRLANCZOS(SVD svd)
458: {
460: SVD_TRLANCZOS *lanczos;
463: PetscNew(SVD_TRLANCZOS,&lanczos);
464: PetscLogObjectMemory(svd,sizeof(SVD_TRLANCZOS));
465: svd->data = (void *)lanczos;
466: svd->ops->setup = SVDSetUp_TRLANCZOS;
467: svd->ops->solve = SVDSolve_TRLANCZOS;
468: svd->ops->destroy = SVDDestroy_Default;
469: svd->ops->setfromoptions = SVDSetFromOptions_TRLANCZOS;
470: svd->ops->view = SVDView_TRLANCZOS;
471: lanczos->oneside = PETSC_FALSE;
472: PetscObjectComposeFunctionDynamic((PetscObject)svd,"SVDTRLanczosSetOneSide_C","SVDTRLanczosSetOneSide_TRLANCZOS",SVDTRLanczosSetOneSide_TRLANCZOS);
473: return(0);
474: }