Actual source code: trlanczos.c

  1: /*                       

  3:    SLEPc singular value solver: "trlanczos"

  5:    Method: Golub-Kahan-Lanczos bidiagonalization with thick-restart

  7:    Last update: Jun 2007

  9:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 10:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 11:    Copyright (c) 2002-2009, Universidad Politecnica de Valencia, Spain

 13:    This file is part of SLEPc.
 14:       
 15:    SLEPc is free software: you can redistribute it and/or modify it under  the
 16:    terms of version 3 of the GNU Lesser General Public License as published by
 17:    the Free Software Foundation.

 19:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 20:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 21:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 22:    more details.

 24:    You  should have received a copy of the GNU Lesser General  Public  License
 25:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 26:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 27: */

 29:  #include private/svdimpl.h
 30:  #include private/ipimpl.h
 31:  #include slepcblaslapack.h

 33: typedef struct {
 34:   PetscTruth oneside;
 35: } SVD_TRLANCZOS;

 39: PetscErrorCode SVDSetUp_TRLANCZOS(SVD svd)
 40: {
 41:   PetscErrorCode  ierr;
 42:   PetscInt        i,N,nloc;
 43:   PetscScalar     *pU;

 46:   SVDMatGetSize(svd,PETSC_NULL,&N);
 47:   if (svd->ncv) { /* ncv set */
 48:     if (svd->ncv<svd->nsv) SETERRQ(1,"The value of ncv must be at least nsv");
 49:   }
 50:   else if (svd->mpd) { /* mpd set */
 51:     svd->ncv = PetscMin(N,svd->nsv+svd->mpd);
 52:   }
 53:   else { /* neither set: defaults depend on nsv being small or large */
 54:     if (svd->nsv<500) svd->ncv = PetscMin(N,PetscMax(2*svd->nsv,10));
 55:     else { svd->mpd = 500; svd->ncv = PetscMin(N,svd->nsv+svd->mpd); }
 56:   }
 57:   if (!svd->mpd) svd->mpd = svd->ncv;
 58:   if (svd->ncv>svd->nsv+svd->mpd) SETERRQ(1,"The value of ncv must not be larger than nev+mpd");
 59:   if (!svd->max_it)
 60:     svd->max_it = PetscMax(N/svd->ncv,100);
 61:   if (svd->ncv!=svd->n) {
 62:     if (svd->U) {
 63:       VecGetArray(svd->U[0],&pU);
 64:       for (i=0;i<svd->n;i++) { VecDestroy(svd->U[i]);  }
 65:       PetscFree(pU);
 66:       PetscFree(svd->U);
 67:     }
 68:     PetscMalloc(sizeof(Vec)*svd->ncv,&svd->U);
 69:     SVDMatGetLocalSize(svd,&nloc,PETSC_NULL);
 70:     PetscMalloc(svd->ncv*nloc*sizeof(PetscScalar),&pU);
 71:     for (i=0;i<svd->ncv;i++) {
 72:       VecCreateMPIWithArray(((PetscObject)svd)->comm,nloc,PETSC_DECIDE,pU+i*nloc,&svd->U[i]);
 73:     }
 74:   }
 75:   return(0);
 76: }

 80: static PetscErrorCode SVDOneSideTRLanczosMGS(SVD svd,PetscReal *alpha,PetscReal *beta,PetscScalar* bb,Vec *V,Vec v,Vec* U,PetscInt nconv,PetscInt l,PetscInt n,PetscScalar* work,Vec wv,Vec wu)
 81: {
 83:   PetscReal      a,b;
 84:   PetscInt       i,k=nconv+l;

 87:   SVDMatMult(svd,PETSC_FALSE,V[k],U[k]);
 88:   if (l>0) {
 89:     VecSet(wu,0.0);
 90:     VecMAXPY(wu,l,bb,U+nconv);
 91:     VecAXPY(U[k],-1.0,wu);
 92:   }
 93:   IPNorm(svd->ip,U[k],&a);
 94:   VecScale(U[k],1.0/a);
 95:   alpha[0] = a;
 96:   for (i=k+1;i<n;i++) {
 97:     SVDMatMult(svd,PETSC_TRUE,U[i-1],V[i]);
 98:     IPOrthogonalize(svd->ip,i,PETSC_NULL,V,V[i],work,&b,PETSC_NULL,wv,PETSC_NULL);
 99:     VecScale(V[i],1.0/b);
100:     beta[i-k-1] = b;
101: 
102:     SVDMatMult(svd,PETSC_FALSE,V[i],U[i]);
103:     VecAXPY(U[i],-b,U[i-1]);
104:     IPNorm(svd->ip,U[i],&a);
105:     VecScale(U[i],1.0/a);
106:     alpha[i-k] = a;
107:   }
108:   SVDMatMult(svd,PETSC_TRUE,U[n-1],v);
109:   IPOrthogonalize(svd->ip,n,PETSC_NULL,V,v,work,&b,PETSC_NULL,wv,PETSC_NULL);
110:   beta[n-k-1] = b;
111:   return(0);
112: }

116: static PetscErrorCode SVDOneSideTRLanczosCGS(SVD svd,PetscReal *alpha,PetscReal *beta,PetscScalar* bb,Vec *V,Vec v,Vec* U,PetscInt nconv,PetscInt l,PetscInt n,PetscScalar* work,Vec wv,Vec wu)
117: {
119:   PetscReal      a,b,sum,onorm;
120:   PetscScalar    dot;
121:   PetscInt       i,j,k=nconv+l;

124:   SVDMatMult(svd,PETSC_FALSE,V[k],U[k]);
125:   if (l>0) {
126:     VecSet(wu,0.0);
127:     VecMAXPY(wu,l,bb,U+nconv);
128:     VecAXPY(U[k],-1.0,wu);
129:   }
130:   for (i=k+1;i<n;i++) {
131:     SVDMatMult(svd,PETSC_TRUE,U[i-1],V[i]);
132:     IPNormBegin(svd->ip,U[i-1],&a);
133:     if (svd->ip->orthog_ref == IP_ORTH_REFINE_IFNEEDED) {
134:       IPInnerProductBegin(svd->ip,V[i],V[i],&dot);
135:     }
136:     IPMInnerProductBegin(svd->ip,V[i],i,V,work);
137:     IPNormEnd(svd->ip,U[i-1],&a);
138:     if (svd->ip->orthog_ref == IP_ORTH_REFINE_IFNEEDED) {
139:       IPInnerProductEnd(svd->ip,V[i],V[i],&dot);
140:     }
141:     IPMInnerProductEnd(svd->ip,V[i],i,V,work);
142: 
143:     VecScale(U[i-1],1.0/a);
144:     VecScale(V[i],1.0/a);
145:     for (j=0;j<i;j++) work[j] = - work[j] / a;
146:     VecMAXPY(V[i],i,work,V);

148:     switch (svd->ip->orthog_ref) {
149:     case IP_ORTH_REFINE_NEVER:
150:       IPNorm(svd->ip,V[i],&b);
151:       break;
152:     case IP_ORTH_REFINE_ALWAYS:
153:       IPOrthogonalizeCGS(svd->ip,i,PETSC_NULL,V,V[i],work,PETSC_NULL,&b,wv);
154:       break;
155:     case IP_ORTH_REFINE_IFNEEDED:
156:       onorm = sqrt(PetscRealPart(dot)) / a;
157:       sum = 0.0;
158:       for (j=0;j<i;j++) {
159:         sum += PetscRealPart(work[j] * PetscConj(work[j]));
160:       }
161:       b = PetscRealPart(dot)/(a*a) - sum;
162:       if (b>0.0) b = sqrt(b);
163:       else {
164:         IPNorm(svd->ip,V[i],&b);
165:       }
166:       if (b < svd->ip->orthog_eta * onorm) {
167:         IPOrthogonalizeCGS(svd->ip,i,PETSC_NULL,V,V[i],work,PETSC_NULL,&b,wv);
168:       }
169:       break;
170:     }
171: 
172:     VecScale(V[i],1.0/b);
173: 
174:     SVDMatMult(svd,PETSC_FALSE,V[i],U[i]);
175:     VecAXPY(U[i],-b,U[i-1]);

177:     alpha[i-k-1] = a;
178:     beta[i-k-1] = b;
179:   }
180:   SVDMatMult(svd,PETSC_TRUE,U[n-1],v);
181:   IPNormBegin(svd->ip,U[n-1],&a);
182:   if (svd->ip->orthog_ref == IP_ORTH_REFINE_IFNEEDED) {
183:     IPInnerProductBegin(svd->ip,v,v,&dot);
184:   }
185:   IPMInnerProductBegin(svd->ip,v,n,V,work);
186:   IPNormEnd(svd->ip,U[n-1],&a);
187:   if (svd->ip->orthog_ref == IP_ORTH_REFINE_IFNEEDED) {
188:     IPInnerProductEnd(svd->ip,v,v,&dot);
189:   }
190:   IPMInnerProductEnd(svd->ip,v,n,V,work);
191: 
192:   VecScale(U[n-1],1.0/a);
193:   VecScale(v,1.0/a);
194:   for (j=0;j<n;j++) work[j] = - work[j] / a;
195:   VecMAXPY(v,n,work,V);

197:   switch (svd->ip->orthog_ref) {
198:   case IP_ORTH_REFINE_NEVER:
199:     IPNorm(svd->ip,v,&b);
200:     break;
201:   case IP_ORTH_REFINE_ALWAYS:
202:     IPOrthogonalizeCGS(svd->ip,n,PETSC_NULL,V,v,work,PETSC_NULL,&b,wv);
203:     break;
204:   case IP_ORTH_REFINE_IFNEEDED:
205:     onorm = sqrt(PetscRealPart(dot)) / a;
206:     sum = 0.0;
207:     for (j=0;j<i;j++) {
208:       sum += PetscRealPart(work[j] * PetscConj(work[j]));
209:     }
210:     b = PetscRealPart(dot)/(a*a) - sum;
211:     if (b>0.0) b = sqrt(b);
212:     else {
213:       IPNorm(svd->ip,v,&b);
214:     }
215:     if (b < svd->ip->orthog_eta * onorm) {
216:       IPOrthogonalizeCGS(svd->ip,n,PETSC_NULL,V,v,work,PETSC_NULL,&b,wv);
217:     }
218:     break;
219:   }
220: 
221:   alpha[n-k-1] = a;
222:   beta[n-k-1] = b;
223:   return(0);
224: }

228: PetscErrorCode SVDSolve_TRLANCZOS(SVD svd)
229: {
231:   SVD_TRLANCZOS  *lanczos = (SVD_TRLANCZOS *)svd->data;
232:   PetscReal      *alpha,*beta,norm;
233:   PetscScalar    *b,*Q,*PT,*swork;
234:   PetscInt       i,j,k,l,m,n,nv;
235:   Vec            v,wv,wu;
236:   PetscTruth     conv;
237:   IPOrthogonalizationType orthog;
238: 
240:   /* allocate working space */
241:   PetscMalloc(sizeof(PetscReal)*svd->n,&alpha);
242:   PetscMalloc(sizeof(PetscReal)*svd->n,&beta);
243:   PetscMalloc(sizeof(PetscScalar)*svd->n,&b);
244:   PetscMalloc(sizeof(PetscScalar)*svd->n*svd->n,&Q);
245:   PetscMalloc(sizeof(PetscScalar)*svd->n*svd->n,&PT);
246: #if !defined(PETSC_USE_COMPLEX)
247:   if (svd->which == SVD_SMALLEST) {
248: #endif
249:     PetscMalloc(sizeof(PetscScalar)*svd->n*svd->n,&swork);
250: #if !defined(PETSC_USE_COMPLEX)
251:   } else {
252:     PetscMalloc(sizeof(PetscScalar)*svd->n,&swork);
253:   }
254: #endif
255:   VecDuplicate(svd->V[0],&v);
256:   VecDuplicate(svd->V[0],&wv);
257:   VecDuplicate(svd->U[0],&wu);
258:   IPGetOrthogonalization(svd->ip,&orthog,PETSC_NULL,PETSC_NULL);
259: 
260:   /* normalize start vector */
261:   VecCopy(svd->vec_initial,svd->V[0]);
262:   VecNormalize(svd->V[0],&norm);
263: 
264:   l = 0;
265:   while (svd->reason == SVD_CONVERGED_ITERATING) {
266:     svd->its++;

268:     /* inner loop */
269:     nv = PetscMin(svd->nconv+svd->mpd,svd->n);
270:     if (lanczos->oneside) {
271:       if (orthog == IP_MGS_ORTH) {
272:         SVDOneSideTRLanczosMGS(svd,alpha,beta,b+svd->nconv,svd->V,v,svd->U,svd->nconv,l,nv,swork,wv,wu);
273:       } else {
274:         SVDOneSideTRLanczosCGS(svd,alpha,beta,b+svd->nconv,svd->V,v,svd->U,svd->nconv,l,nv,swork,wv,wu);
275:       }
276:     } else {
277:       SVDTwoSideLanczos(svd,alpha,beta,svd->V,v,svd->U,svd->nconv+l,nv,swork,wv,wu);
278:     }
279:     VecScale(v,1.0/beta[nv-svd->nconv-l-1]);
280: 
281:     /* compute SVD of general matrix */
282:     n = nv - svd->nconv;
283:     /* first l columns */
284:     for (j=0;j<l;j++) {
285:       for (i=0;i<j;i++) Q[j*n+i] = 0.0;
286:       Q[j*n+j] = svd->sigma[svd->nconv+j];
287:       for (i=j+1;i<n;i++) Q[j*n+i] = 0.0;
288:     }
289:     /* l+1 column */
290:     for (i=0;i<l;i++) Q[l*n+i] = b[i+svd->nconv];
291:     Q[l*n+l] = alpha[0];
292:     for (i=l+1;i<n;i++) Q[l*n+i] = 0.0;
293:     /* rest of matrix */
294:     for (j=l+1;j<n;j++) {
295:       for (i=0;i<j-1;i++) Q[j*n+i] = 0.0;
296:       Q[j*n+j-1] = beta[j-l-1];
297:       Q[j*n+j] = alpha[j-l];
298:       for (i=j+1;i<n;i++) Q[j*n+i] = 0.0;
299:     }
300:     SVDDense(n,n,Q,alpha,PETSC_NULL,PT);

302:     /* compute error estimates */
303:     k = 0;
304:     conv = PETSC_TRUE;
305:     for (i=svd->nconv;i<nv;i++) {
306:       if (svd->which == SVD_SMALLEST) j = n-i+svd->nconv-1;
307:       else j = i-svd->nconv;
308:       svd->sigma[i] = alpha[j];
309:       b[i] = Q[j*n+n-1]*beta[n-l-1];
310:       svd->errest[i] = PetscAbsScalar(b[i]);
311:       if (alpha[j] > svd->tol) svd->errest[i] /= alpha[j];
312:       if (conv) {
313:         if (svd->errest[i] < svd->tol) k++;
314:         else conv = PETSC_FALSE;
315:       }
316:     }
317: 
318:     /* check convergence and update l */
319:     if (svd->its >= svd->max_it) svd->reason = SVD_DIVERGED_ITS;
320:     if (svd->nconv+k >= svd->nsv) svd->reason = SVD_CONVERGED_TOL;
321:     if (svd->reason != SVD_CONVERGED_ITERATING) l = 0;
322:     else l = PetscMax((nv - svd->nconv - k) / 2,0);
323: 
324:     /* compute converged singular vectors and restart vectors*/
325: #if !defined(PETSC_USE_COMPLEX)
326:     if (svd->which == SVD_SMALLEST) {
327: #endif
328:     for (i=0;i<k+l;i++) {
329:       if (svd->which == SVD_SMALLEST) j = n-i-1;
330:       else j = i;
331:       for (m=0;m<n;m++) swork[j*n+m] = PT[m*n+j];
332:     }
333:     SlepcUpdateVectors(n,svd->V+svd->nconv,0,k+l,swork,n,PETSC_FALSE);
334:     for (i=0;i<k+l;i++) {
335:       if (svd->which == SVD_SMALLEST) j = n-i-1;
336:       else j = i;
337:       for (m=0;m<n;m++) swork[j*n+m] = Q[j*n+m];
338:     }
339:     SlepcUpdateVectors(n,svd->U+svd->nconv,0,k+l,swork,n,PETSC_FALSE);
340: #if !defined(PETSC_USE_COMPLEX)
341:     } else {
342:       SlepcUpdateVectors(n,svd->V+svd->nconv,0,k+l,PT,n,PETSC_TRUE);
343:       SlepcUpdateVectors(n,svd->U+svd->nconv,0,k+l,Q,n,PETSC_FALSE);
344:     }
345: #endif
346: 
347:     /* copy the last vector to be the next initial vector */
348:     if (svd->reason == SVD_CONVERGED_ITERATING) {
349:       VecCopy(v,svd->V[svd->nconv+k+l]);
350:     }
351: 
352:     svd->nconv += k;
353:     SVDMonitor(svd,svd->its,svd->nconv,svd->sigma,svd->errest,nv);
354:   }
355: 
356:   /* orthonormalize U columns in one side method */
357:   if (lanczos->oneside) {
358:     for (i=0;i<svd->nconv;i++) {
359:       IPOrthogonalize(svd->ip,i,PETSC_NULL,svd->U,svd->U[i],PETSC_NULL,&norm,PETSC_NULL,PETSC_NULL,PETSC_NULL);
360:       VecScale(svd->U[i],1.0/norm);
361:     }
362:   }
363: 
364:   /* free working space */
365:   VecDestroy(v);
366:   VecDestroy(wv);
367:   VecDestroy(wu);

369:   PetscFree(alpha);
370:   PetscFree(beta);
371:   PetscFree(b);
372:   PetscFree(Q);
373:   PetscFree(PT);
374:   PetscFree(swork);
375:   return(0);
376: }

380: PetscErrorCode SVDSetFromOptions_TRLANCZOS(SVD svd)
381: {
383:   SVD_TRLANCZOS  *lanczos = (SVD_TRLANCZOS *)svd->data;

386:   PetscOptionsBegin(((PetscObject)svd)->comm,((PetscObject)svd)->prefix,"TRLANCZOS Singular Value Solver Options","SVD");
387:   PetscOptionsTruth("-svd_trlanczos_oneside","Lanczos one-side reorthogonalization","SVDTRLanczosSetOneSide",PETSC_FALSE,&lanczos->oneside,PETSC_NULL);
388:   PetscOptionsEnd();
389:   return(0);
390: }

395: PetscErrorCode SVDTRLanczosSetOneSide_TRLANCZOS(SVD svd,PetscTruth oneside)
396: {
397:   SVD_TRLANCZOS *lanczos = (SVD_TRLANCZOS *)svd->data;

400:   lanczos->oneside = oneside;
401:   return(0);
402: }

407: /*@
408:    SVDTRLanczosSetOneSide - Indicate if the variant of the Lanczos method 
409:    to be used is one-sided or two-sided.

411:    Collective on SVD

413:    Input Parameters:
414: +  svd     - singular value solver
415: -  oneside - boolean flag indicating if the method is one-sided or not

417:    Options Database Key:
418: .  -svd_trlanczos_oneside <boolean> - Indicates the boolean flag

420:    Note:
421:    By default, a two-sided variant is selected, which is sometimes slightly
422:    more robust. However, the one-sided variant is faster because it avoids 
423:    the orthogonalization associated to left singular vectors. 

425:    Level: advanced

427: .seealso: SVDLanczosSetOneSide()
428: @*/
429: PetscErrorCode SVDTRLanczosSetOneSide(SVD svd,PetscTruth oneside)
430: {
431:   PetscErrorCode ierr, (*f)(SVD,PetscTruth);

435:   PetscObjectQueryFunction((PetscObject)svd,"SVDTRLanczosSetOneSide_C",(void (**)())&f);
436:   if (f) {
437:     (*f)(svd,oneside);
438:   }
439:   return(0);
440: }

444: PetscErrorCode SVDView_TRLANCZOS(SVD svd,PetscViewer viewer)
445: {
447:   SVD_TRLANCZOS  *lanczos = (SVD_TRLANCZOS *)svd->data;

450:   PetscViewerASCIIPrintf(viewer,"Lanczos reorthogonalization: %s\n",lanczos->oneside ? "one-side" : "two-side");
451:   return(0);
452: }

457: PetscErrorCode SVDCreate_TRLANCZOS(SVD svd)
458: {
460:   SVD_TRLANCZOS  *lanczos;

463:   PetscNew(SVD_TRLANCZOS,&lanczos);
464:   PetscLogObjectMemory(svd,sizeof(SVD_TRLANCZOS));
465:   svd->data                = (void *)lanczos;
466:   svd->ops->setup          = SVDSetUp_TRLANCZOS;
467:   svd->ops->solve          = SVDSolve_TRLANCZOS;
468:   svd->ops->destroy        = SVDDestroy_Default;
469:   svd->ops->setfromoptions = SVDSetFromOptions_TRLANCZOS;
470:   svd->ops->view           = SVDView_TRLANCZOS;
471:   lanczos->oneside         = PETSC_FALSE;
472:   PetscObjectComposeFunctionDynamic((PetscObject)svd,"SVDTRLanczosSetOneSide_C","SVDTRLanczosSetOneSide_TRLANCZOS",SVDTRLanczosSetOneSide_TRLANCZOS);
473:   return(0);
474: }