Actual source code: dqds.c

slepc-3.7.1 2016-05-27
Report Typos and Errors
  1: /*
  2:    DQDS-type dense solver for generalized symmetric-indefinite eigenproblem.
  3:    Based on Matlab code from Carla Ferreira.

  5:    References:

  7:        [1] C. Ferreira, B. Parlett, "Real DQDS for the nonsymmetric tridiagonal
  8:            eigenvalue problem", preprint, 2012.

 10:        [2] C. Ferreira. The unsymmetric tridiagonal eigenvalue problem. Ph.D
 11:            Thesis, University of Minho, 2007.

 13:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 14:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 15:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

 17:    This file is part of SLEPc.

 19:    SLEPc is free software: you can redistribute it and/or modify it under  the
 20:    terms of version 3 of the GNU Lesser General Public License as published by
 21:    the Free Software Foundation.

 23:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 24:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 25:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 26:    more details.

 28:    You  should have received a copy of the GNU Lesser General  Public  License
 29:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 30:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 31: */
 32: #include <slepc/private/dsimpl.h>
 33: #include <slepcblaslapack.h>

 37: /*
 38:   INPUT:
 39:     a ---- diagonal of J
 40:     b ---- subdiagonal of J;
 41:     the superdiagonal of J is all 1's

 43:   OUTPUT:
 44:     For an eigenvalue lambda of J we have:
 45:       gl=<real(lambda)<=gr
 46:       -sigma=<imag(lambda)<=sigma
 47: */
 48: static PetscErrorCode ScanJ(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *gl,PetscReal *gr,PetscReal *sigma)
 49: {
 50:   PetscInt  i;
 51:   PetscReal b0,b1,rad;

 54:   /* For original matrix C, C_bal=T+S; T-symmetric and S=skew-symmetric
 55:    C_bal is the balanced form of C */
 56:   /* Bounds on the imaginary part of C (Gersgorin bound for S)*/
 57:   *sigma = 0.0;
 58:   b0 = 0.0;
 59:   for (i=0;i<n-1;i++) {
 60:     if (b[i]<0.0) b1 = PetscSqrtReal(-b[i]);
 61:     else b1 = 0.0;
 62:     *sigma = PetscMax(*sigma,b1+b0);
 63:     b0 = b1;
 64:   }
 65:   *sigma = PetscMax(*sigma,b0);
 66:   /* Gersgorin bounds for T (=Gersgorin bounds on the real part for C) */
 67:   rad = (b[0]>0.0)?PetscSqrtReal(b[0]):0.0; /* rad = b1+b0, b0 = 0 */
 68:   *gr = a[0]+rad;
 69:   *gl = a[0]-rad;
 70:   b0 = rad;
 71:   for (i=1;i<n-1;i++) {
 72:     b1 = (b[i]>0.0)?PetscSqrtReal(b[i]):0.0;
 73:     rad = b0+b1;
 74:     *gr = PetscMax(*gr,a[i]+rad);
 75:     *gl = PetscMin(*gl,a[i]-rad);
 76:     b0 = b1;
 77:   }
 78:   rad = b0;
 79:   *gr = PetscMax(*gr,a[n-1]+rad);
 80:   *gl = PetscMin(*gl,a[n-1]-rad);
 81:   return(0);
 82: }

 86: /*
 87:   INPUT:
 88:     a  - vector with the diagonal elements
 89:     b  - vector with the subdiagonal elements
 90:     gl - Gersgorin left bound (real axis)
 91:     gr - Gersgorin right bound (real axis)
 92:   OUTPUT:
 93:     eigvalue - multiple eigenvalue (if there is an eigenvalue)
 94:     m        - its multiplicity    (m=0 if there isn't a multiple eigenvalue)
 95:     X        - matrix of generalized eigenvectors
 96:     shift
 97:     dim(work)=5*n+4
 98: */
 99: static PetscErrorCode Prologue(PetscInt n,PetscReal *a,PetscReal *b,PetscReal gl,PetscReal gr,PetscInt *m,PetscReal *shift,PetscReal *work)
100: {

103:   PetscReal      mu,tol,*a1,*y,*yp,*x,*xp;
104:   PetscInt       i,k;

107:   *m = 0;
108:   mu = 0.0;
109:   for (i=0;i<n;i++) mu += a[i];
110:   mu /= n;
111:   tol = n*PETSC_MACHINE_EPSILON*(gr-gl);
112:   a1 = work; /* size n */
113:   y = work+n; /* size n+1 */
114:   yp = y+n+1; /* size n+1. yp is the derivative of y (p for "prime") */
115:   x = yp+n+1; /* size n+1 */
116:   xp = x+n+1; /* size n+1 */
117:   for (i=0;i<n;i++) a1[i] = mu-a[i];
118:   x[0] = 1;
119:   xp[0] = 0;
120:   x[1] = a1[0];
121:   xp[1] = 1;
122:   for (i=1;i<n;i++) {
123:     x[i+1]=a1[i]*x[i]-b[i-1]*x[i-1];
124:     xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1];
125:   }
126:   *shift = mu;
127:   if (PetscAbsReal(x[n])<tol) {
128:     /* mu is an eigenvalue */
129:     *m = *m+1;
130:     if (PetscAbsReal(xp[n])<tol) {
131:       /* mu is a multiple eigenvalue; Is it the one-point spectrum case? */
132:       k = 0;
133:       while (PetscAbsReal(xp[n])<tol && k<n-1) {
134:         PetscMemcpy(x,y,(n+1)*sizeof(PetscReal));
135:         PetscMemcpy(xp,yp,(n+1)*sizeof(PetscReal));
136:         x[k] = 0.0;
137:         k++;
138:         x[k] = 1.0;
139:         xp[k] = 0.0;
140:         x[k+1] = a1[k] + y[k];
141:         xp[k+1] = 1+yp[k];
142:         for (i=k+1;i<n;i++) {
143:           x[i+1] = a1[i]*x[i]-b[i-1]*x[i-1]+y[i];
144:           xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1]+yp[i];
145:         }
146:         *m = *m+1;
147:       }
148:     }
149:   }
150: /*
151:   When mu is not an eigenvalue or it it an eigenvalue but it is not the one-point spectrum case, we will always have shift=mu

153:   Need to check for overflow!

155:   After calling Prologue, eigenComplexdqds and eigen3dqds will test if m==n in which case we have the one-point spectrum case;
156:   If m!=0, the only output to be used is the shift returned.
157: */
158:   return(0);
159: }

163: static PetscErrorCode LUfac(PetscInt n,PetscReal *a,PetscReal *b,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L,PetscReal *U,PetscInt *fail,PetscReal *work)
164: {
165:   PetscInt       i;
166:   PetscReal      *a1;

169:   a1 = work;
170:   for (i=0;i<n;i++) a1[i] = a[i]-shift;
171:   *fail = 0;
172:   for (i=0;i<n-1;i++) {
173:     U[i] = a1[i];
174:     L[i] = b[i]/U[i];
175:     a1[i+1] = a1[i+1]-L[i];
176:   }
177:   U[n-1] = a1[n-1];

179:   /* Check if there are NaN values */
180:   for (i=0;i<n-1 && !*fail;i++) {
181:     if (PetscIsInfOrNanReal(L[i])) *fail=1;
182:     if (PetscIsInfOrNanReal(U[i])) *fail=1;
183:   }
184:   if (!*fail && PetscIsInfOrNanReal(U[n-1])) *fail=1;

186:   for (i=0;i<n-1 && !*fail;i++) {
187:     if (PetscAbsReal(L[i])>tol*norm) *fail = 1;  /* This demands IEEE arithmetic */
188:     if (PetscAbsReal(U[i])>tol*norm) *fail = 1;
189:   }
190:   if (!*fail && PetscAbsReal(U[n-1])>tol*norm) *fail = 1;
191:   return(0);
192: }

196: static PetscErrorCode RealDQDS(PetscInt n,PetscReal *L,PetscReal *U,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L1,PetscReal *U1,PetscInt *fail)
197: {
198:   PetscReal d;
199:   PetscInt  i;

202:   *fail = 0;
203:   d = U[0]-shift;
204:   for (i=0;i<n-1;i++) {
205:     U1[i] = d+L[i];
206:     L1[i] = L[i]*(U[i+1]/U1[i]);
207:     d = d*(U[i+1]/U1[i])-shift;
208:   }
209:   U1[n-1]=d;

211:   /* The following demands IEEE arithmetic */
212:   for (i=0;i<n-1 && !*fail;i++) {
213:     if (PetscIsInfOrNanReal(L1[i])) *fail=1;
214:     if (PetscIsInfOrNanReal(U1[i])) *fail=1;
215:   }
216:   if (!*fail && PetscIsInfOrNanReal(U1[n-1])) *fail=1;
217:   for (i=0;i<n-1 && !*fail;i++) {
218:     if (PetscAbsReal(L1[i])>tol*norm) *fail=1;
219:     if (PetscAbsReal(U1[i])>tol*norm) *fail=1;
220:   }
221:   if (!*fail && PetscAbsReal(U1[n-1])>tol*norm) *fail=1;
222:   return(0);
223: }

227: static PetscErrorCode TridqdsZhuang3(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscInt *fail)
228: {
229:   PetscReal xl,yl,xr,yr,zr;
230:   PetscInt  i;

233:   *fail = 0;
234:   xr = 1.0;
235:   yr = e[0];
236:   zr = 0.0;
237:   /* Step 1 */
238:   /* the efect of Z1 */
239:   xr = xr*q[0]+yr;
240:   /* the inverse of L1 */
241:   xl = (q[0]+e[0])*(q[0]+e[0])+q[1]*e[0]-sum*(q[0]+e[0])+prod;
242:   yl = -(q[2]*e[1]*q[1]*e[0])/xl;
243:   xl = -(q[1]*e[0]*(q[0]+e[0]+q[1]+e[1]-sum))/xl;
244:   /* the efect of L1 */
245:   q[0] = xr-xl;
246:   xr = yr-xl;
247:   yr = zr-yl-xl*e[1];
248:   /*the inverse of Y1 */
249:   xr = xr/q[0];
250:   yr = yr/q[0];
251:   /*the effect of Y1 inverse */
252:   e[0] = xl+yr+xr*q[1];
253:   xl = yl+zr+yr*q[2];      /* zr=0  when n=3 */
254:   /*the effect of Y1 */
255:   xr = 1.0-xr;
256:   yr = e[1]-yr;

258:   /* STEP n-1 */

260:   if (PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(q[n-3])) {
261:     /* the efect of Zn-1 */
262:     xr = xr*q[n-2]+yr;
263:     /* the inverse of Ln-1 */
264:     xl = -xl/e[n-3];
265:     /* the efect of Ln-1 */
266:     q[n-2] = xr-xl;
267:     xr = yr-xl;
268:     /*the inverse of Yn-1 */
269:     xr = xr/q[n-2];
270:     /*the effect of the inverse of Yn-1 */
271:     e[n-2] = xl+xr*q[n-1];
272:     /*the effects of Yn-1 */
273:     xr = 1.0-xr;
274:     /* STEP n */
275:     /*the effect of Zn */
276:     xr = xr*q[n-1];
277:     /*the inverse of Ln=I */
278:     /*the effect of Ln */
279:     q[n-1] = xr;
280:     /* the inverse of  Yn-1=I */

282:   } else { /* Free deflation */
283:     e[n-2] = (e[n-3]+(xr*q[n-2]+yr)+q[n-1])*0.5;       /* Sum=trace/2 */
284:     q[n-2] = (e[n-3]+q[n-2]*xr)*q[n-1]-xl;             /* det */
285:     q[n-1] = e[n-2]*e[n-2]-q[n-2];
286:     *fail = 2;
287:   }

289:   /* The following demands IEEE arithmetic */
290:   for (i=0;i<n-1 && !*fail;i++) {
291:     if (PetscIsInfOrNanReal(e[i])) *fail=1;
292:     if (PetscIsInfOrNanReal(q[i])) *fail=1;
293:   }
294:   if (!*fail && PetscIsInfOrNanReal(q[n-1])) *fail=1;
295:   for (i=0;i<n-1 && !*fail;i++) {
296:     if (PetscAbsReal(e[i])>tol*norm) *fail=1;
297:     if (PetscAbsReal(q[i])>tol*norm) *fail=1;
298:   }
299:   if (!*fail && PetscAbsReal(q[n-1])>tol*norm) *fail=1;
300:   return(0);
301: }

305: static PetscErrorCode TridqdsZhuang(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscReal *e1,PetscReal *q1,PetscInt *fail)
306: {
308:   PetscInt       i;
309:   PetscReal      xl,yl,xr,yr,zr;

312:   for (i=0;i<n-1;i++) {
313:     e1[i] = e[i];
314:     q1[i] = q[i];
315:   }
316:   q1[n-1] = q[n-1];
317:   *fail = 0;
318:   if (n>3) {   /* For n>3 */
319:     *fail = 0;
320:     xr = 1;
321:     yr = e1[0];
322:     zr = 0;
323:     /* step 1 */
324:     /* the efect of Z1 */
325:     xr = xr*q1[0]+yr;
326:     /* the inverse of L1 */
327:     xl = (q1[0]+e1[0])*(q1[0]+e1[0])+q1[1]*e1[0]-sum*(q1[0]+e1[0])+prod;
328:     yl = -(q1[2]*e1[1]*q1[1]*e1[0])/xl;
329:     xl = -(q1[1]*e1[0]*(q1[0]+e1[0]+q1[1]+e1[1]-sum))/xl;
330:     /* the efect of L1 */
331:     q1[0] = xr-xl;
332:     xr = yr-xl;
333:     yr = zr-yl-xl*e1[1];
334:     zr = -yl*e1[2];
335:     /* the inverse of Y1 */
336:     xr = xr/q1[0];
337:     yr = yr/q1[0];
338:     zr = zr/q1[0];
339:     /* the effect of Y1 inverse */
340:     e1[0] = xl+yr+xr*q1[1];
341:     xl = yl+zr+yr*q1[2];
342:     yl = zr*q1[3];
343:     /* the effect of Y1 */
344:     xr = 1-xr;
345:     yr = e1[1]-yr;
346:     zr = -zr;
347:     /* step i=2,...,n-3 */
348:     for (i=1;i<n-3;i++) {
349:       /* the efect of Zi */
350:       xr = xr*q1[i]+yr;
351:       /* the inverse of Li */
352:       xl = -xl/e1[i-1];
353:       yl = -yl/e1[i-1];
354:       /* the efect of Li */
355:       q1[i] = xr-xl;
356:       xr = yr-xl;
357:       yr = zr-yl-xl*e1[i+1];
358:       zr = -yl*e1[i+2];
359:       /* the inverse of Yi */
360:       xr = xr/q1[i];
361:       yr = yr/q1[i];
362:       zr = zr/q1[i];
363:       /* the effect of the inverse of Yi */
364:       e1[i] = xl+yr+xr*q1[i+1];
365:       xl = yl+zr+yr*q1[i+2];
366:       yl = zr*q1[i+3];
367:       /* the effects of Yi */
368:       xr = 1.0-xr;
369:       yr = e1[i+1]-yr;
370:       zr = -zr;
371:     }

373:     /* STEP n-2            zr is no longer needed */

375:     /* the efect of Zn-2 */
376:     xr = xr*q1[n-3]+yr;
377:     /* the inverse of Ln-2 */
378:     xl = -xl/e1[n-4];
379:     yl = -yl/e1[n-4];
380:     /* the efect of Ln-2 */
381:     q1[n-3] = xr-xl;
382:     xr = yr-xl;
383:     yr = zr-yl-xl*e1[n-2];
384:     /* the inverse of Yn-2 */
385:     xr = xr/q1[n-3];
386:     yr = yr/q1[n-3];
387:     /* the effect of the inverse of Yn-2 */
388:     e1[n-3] = xl+yr+xr*q1[n-2];
389:     xl = yl+yr*q1[n-1];
390:     /* the effect of Yn-2 */
391:     xr = 1.0-xr;
392:     yr = e1[n-2]-yr;

394:     /* STEP n-1           yl and yr are no longer needed */
395:     /* Testing for EARLY DEFLATION */

397:     if (PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(q1[n-3])) {
398:       /* the efect of Zn-1 */
399:       xr = xr*q1[n-2]+yr;
400:       /* the inverse of Ln-1 */
401:       xl = -xl/e1[n-3];
402:       /* the efect of Ln-1 */
403:       q1[n-2] = xr-xl;
404:       xr = yr-xl;
405:       /*the inverse of Yn-1 */
406:       xr = xr/q1[n-2];
407:       /*the effect of the inverse of Yn-1 */
408:       e1[n-2] = xl+xr*q1[n-1];
409:       /*the effects of Yn-1 */
410:       xr = 1.0-xr;

412:       /* STEP n;     xl no longer needed */
413:       /* the effect of Zn */
414:       xr = xr*q1[n-1];
415:       /* the inverse of Ln = I */
416:       /* the effect of Ln */
417:       q1[n-1] = xr;
418:       /* the inverse of  Yn-1=I */

420:     } else {  /* FREE DEFLATION */
421:       e1[n-2] = (e1[n-3]+xr*q1[n-2]+yr+q1[n-1])*0.5;     /* sum=trace/2 */
422:       q1[n-2] = (e1[n-3]+q1[n-2]*xr)*q1[n-1]-xl;         /* det */
423:       q1[n-1] = e1[n-2]*e1[n-2]-q1[n-2];
424:       *fail = 2;
425:     }

427:     for (i=0;i<n-1 && !*fail;i++) {
428:       if (PetscIsInfOrNanReal(e1[i])) *fail=1;
429:       if (PetscIsInfOrNanReal(q1[i])) *fail=1;
430:     }
431:     if (!*fail && PetscIsInfOrNanReal(q1[n-1])) *fail=1;
432:     for (i=0;i<n-1 && !*fail;i++) {
433:       if (PetscAbsReal(e1[i])>tol*norm) *fail = 1;  /* This demands IEEE arithmetic */
434:       if (PetscAbsReal(q1[i])>tol*norm) *fail = 1;
435:     }
436:     if (!*fail && PetscAbsReal(q1[n-1])>tol*norm) *fail = 1;

438:   } else {  /* The case n=3 */
439:     TridqdsZhuang3(n,e1,q1,sum,prod,tol,norm,tolDef,fail);
440:   }
441:   return(0);
442: }

446: static PetscErrorCode DSGHIEP_Eigen3DQDS(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *c,PetscScalar *wr,PetscScalar *wi,PetscReal *work)
447: {
448:   PetscInt       totalIt=0;       /* Total Number of Iterations  */
449:   PetscInt       totalFail=0;     /* Total number of failures */
450:   PetscInt       nFail=0;         /* Number of failures per transformation */
451:   PetscReal      tolZero=1.0/16;  /* Tolerance for zero shifts */
452:   PetscInt       maxIt=10*n;      /* Maximum number of iterations */
453:   PetscInt       maxFail=10*n;    /* Maximum number of failures allowed per each transformation */
454:   PetscReal      tolDef=PETSC_MACHINE_EPSILON;  /* Tolerance for deflation eps, 10*eps, 100*eps */
455:   PetscReal      tolGrowth=100000;
457:   PetscInt       i,k,nwu=0,begin,ind,flag,dim,m,*split,lastSplit;
458:   PetscReal      norm,gr,gl,sigma,delta,meanEig,*U,*L,*U1,*L1;
459:   PetscReal      acShift,initialShift,shift=0.0,sum,det,disc,prod,x1,x2;
460:   PetscBool      test1,test2;

463:   dim = n;
464:   /* Test if the matrix is unreduced */
465:   for (i=0;i<n-1;i++) {
466:     if (PetscAbsReal(b[i])==0.0 || PetscAbsReal(c[i])==0.0) SETERRQ(PETSC_COMM_SELF,1,"Initial tridiagonal matrix is not unreduced");
467:   }
468:   U = work;
469:   L = work+n;
470:   U1 = work+2*n;
471:   L1 = work+3*n;
472:   nwu = 4*n;
473:   if (wi) {
474:     PetscMemzero(wi,n*sizeof(PetscScalar));
475:   }
476:   /* Normalization - the J form of C */
477:   for (i=0;i<n-1;i++) b[i] *= c[i]; /* subdiagonal of the J form */

479:   /* Scan matrix J  ---- Finding a box of inclusion for the eigenvalues */
480:   norm = 0.0;
481:   for (i=0;i<n-1;i++) {
482:     norm = PetscMax(norm,PetscMax(PetscAbsReal(a[i]),PetscAbsReal(b[i])));
483:   }
484:   norm = PetscMax(norm,PetscMax(1,PetscAbsReal(a[n-1])));
485:   ScanJ(n,a,b,&gl,&gr,&sigma);
486:   delta = (gr-gl)/n; /* How much to add to the shift, in case of failure (element growth) */
487:   meanEig = 0.0;
488:   for (i=0;i<n;i++) meanEig += a[i];
489:   meanEig /= n; /* shift = initial shift = mean of eigenvalues */
490:   Prologue(n,a,b,gl,gr,&m,&shift,work+nwu);
491:   if (m==n) { /* Multiple eigenvalue, we have the one-point spectrum case */
492:     for (i=0;i<dim;i++) {
493:       wr[i] = shift;
494:       if (wi) wi[i] = 0.0;
495:     }
496:     return(0);
497:   }
498:   /* Initial LU Factorization */
499:   if (delta==0.0) shift=0.0;  /* The case when all eigenvalues are pure imaginary */
500:   LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu); /* flag=1 failure; flag=0 successful transformation*/
501:   while (flag==1 && nFail<maxFail) {
502:     shift=shift+delta;
503:     if (shift>gr || shift<gl) { /* Successive failures */
504:       shift=meanEig;
505:       delta=-delta;
506:     }
507:     nFail=nFail+1;
508:     LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu); /* flag=1 failure; flag=0 successful transformation*/
509:   }
510:   if (nFail==maxFail) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached in Initial LU factorization");
511:   /* Successful Initial transformation */
512:   totalFail = totalFail+nFail;
513:   nFail = 0;
514:   acShift = 0;
515:   initialShift = shift;
516:   shift = 0;
517:   begin = 0;
518:   lastSplit = 0;
519:   PetscMalloc1(n,&split);
520:   split[lastSplit] = begin;
521:   while (begin!=-1) {
522:     while (n-begin>2 && totalIt<maxIt) {
523:       /* Check for deflation before performing a transformation */
524:       test1 = (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])
525:             && PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)
526:             && PetscAbsReal(L[n-2]*U[n])<tolDef*PetscAbsReal(acShift+U[n-1])
527:             && PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)<tolDef*PetscAbsReal(acShift+U[n-1]))? PETSC_TRUE: PETSC_FALSE;
528:       if (flag==2) {  /* Early 2x2 deflation */
529:         test2 = PETSC_TRUE;
530:       } else {
531:         if (n-begin>4) {
532:           test2 = (PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])
533:                && PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3]))? PETSC_TRUE: PETSC_FALSE;
534:         } else { /* n-begin+1=3 */
535:           test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
536:         }
537:       }
538:       while (test2 || test1) {
539:         /* 2x2 deflation */
540:         if (test2) {
541:           if (flag==2) { /* Early deflation */
542:             sum = L[n-2];
543:             det = U[n-2];
544:             disc = U[n-1];
545:             flag = 0;
546:           } else {
547:             sum = (L[n-2]+(U[n-2]+U[n-1]))/2;
548:             disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
549:             det = U[n-2]*U[n-1];
550:           }
551:           if (disc<=0) {
552: #if !defined(PETSC_USE_COMPLEX)
553:             wr[--n] = sum+acShift; if (wi) wi[n] = PetscSqrtReal(-disc);
554:             wr[--n] = sum+acShift; if (wi) wi[n] = -PetscSqrtReal(-disc);
555: #else
556:             wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
557:             wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
558: #endif
559:           } else {
560:             if (sum==0.0) {
561:               x1 = PetscSqrtReal(disc);
562:               x2 = -x1;
563:             } else {
564:               x1 = ((sum>=0.0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
565:               x2 = det/x1;
566:             }
567:             wr[--n] = x1+acShift;
568:             wr[--n] = x2+acShift;
569:           }
570:         } else { /* test1 -- 1x1 deflation */
571:           x1 = U[n-1]+acShift;
572:           wr[--n] = x1;
573:         }

575:         if (n<=begin+2) {
576:           break;
577:         } else {
578:           test1 = (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])
579:                 && PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)
580:                 && PetscAbsReal(L[n-2]*U[n-1])<tolDef*PetscAbsReal(acShift+U[n-1])
581:                 && PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)< tolDef*PetscAbsReal(acShift+U[n-1]))? PETSC_TRUE: PETSC_FALSE;
582:           if (n-begin>4) {
583:             test2 = (PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])
584:                   && PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3]))? PETSC_TRUE: PETSC_FALSE;
585:           } else { /* n-begin+1=3 */
586:             test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
587:           }
588:         }
589:       } /* end "WHILE deflations" */
590:       /* After deflation */
591:       if (n>begin+3) {
592:         ind = begin;
593:         for (k=n-4;k>=begin+1;k--) {
594:           if (PetscAbsReal(L[k])<tolDef*PetscAbsReal(U[k])
595:            && PetscAbsReal(L[k]*U[k+1]*(U[k+2]+L[k+2])*(U[k-1]+L[k-1]))<tolDef*PetscAbsReal((U[k-1]*(U[k]+L[k])+L[k-1]*L[k])*(U[k+1]*(U[k+2]+L[k+2])+L[k+1]*L[k+2]))) {
596:             ind=k;
597:             break;
598:           }
599:         }
600:         if (ind>begin || PetscAbsReal(L[begin]) <tolDef*PetscAbsReal(U[begin])) {
601:           lastSplit = lastSplit+1;
602:           split[lastSplit] = begin;
603:           L[ind] = acShift; /* Use of L[ind] to save acShift */
604:           begin = ind+1;
605:         }
606:       }

608:       if (n>begin+2) {
609:         disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
610:         if ((PetscAbsReal(L[n-2])>tolZero) && (PetscAbsReal(L[n-3])>tolZero)) { /* L's are big */
611:           shift = 0;
612:           RealDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
613:           if (flag) {  /* Failure */
614:             TridqdsZhuang(n-begin,L+begin,U+begin,0.0,0.0,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
615:             shift = 0.0;
616:             while (flag==1 && nFail<maxFail) {  /* Successive failures */
617:               shift = shift+delta;
618:               if (shift>gr-acShift || shift<gl-acShift) {
619:                 shift = meanEig-acShift;
620:                 delta = -delta;
621:               }
622:               nFail++;
623:               RealDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
624:             }
625:           }
626:         } else { /* L's are small */
627:           if (disc<0) {  /* disc <0   Complex case; Francis shift; 3dqds */
628:             sum = U[n-2]+L[n-2]+U[n-1];
629:             prod = U[n-2]*U[n-1];
630:             TridqdsZhuang(n-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
631:             shift = 0.0; /* Restoring transformation */
632:             while (flag==1 && nFail<maxFail) { /* In case of failure */
633:               shift = shift+U[n-1];  /* first time shift=0 */
634:               RealDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
635:               nFail++;
636:             }
637:           } else  { /* disc >0  Real case; real Wilkinson shift; dqds */
638:             sum = (L[n-2]+U[n-2]+U[n-1])/2;
639:             if (sum==0.0) {
640:               x1 = PetscSqrtReal(disc);
641:               x2 = -x1;
642:             } else {
643:               x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
644:               x2 = U[n-2]*U[n-1]/x1;
645:             }
646:             /* Take the eigenvalue closest to UL(n,n) */
647:             if (PetscAbsReal(x1-U[n-1])<PetscAbsReal(x2-U[n-1])) {
648:               shift = x1;
649:             } else {
650:               shift = x2;
651:             }
652:             RealDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
653:             /* In case of failure */
654:             while (flag==1 && nFail<maxFail) {
655:               sum = 2*shift;
656:               prod = shift*shift;
657:               TridqdsZhuang(n-1-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
658:               /* In case of successive failures */
659:               if (shift==0.0) {
660:                 shift = PetscMin(PetscAbsReal(L[n-2]),PetscAbsReal(L[n-3]))*delta;
661:               } else {
662:                 shift=shift+delta;
663:               }
664:               if (shift>gr-acShift || shift<gl-acShift) {
665:                 shift = meanEig-acShift;
666:                 delta = -delta;
667:               }
668:               if (!flag) { /* We changed from real dqds to 3dqds */
669:                 shift=0;
670:               }
671:               nFail++;
672:             }
673:           }
674:         } /* end "if tolZero" */
675:         if (nFail==maxFail) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached. No convergence in DQDS");
676:         /* Successful Transformation; flag==0 */
677:         totalIt++;
678:         acShift = shift+acShift;
679:         for (i=begin;i<n-1;i++) {
680:           L[i] = L1[i];
681:           U[i] = U1[i];
682:         }
683:         U[n-1] = U1[n-1];
684:         totalFail = totalFail+nFail;
685:         nFail = 0;
686:       }  /* end "if n>begin+1" */
687:     }  /* end WHILE 1 */
688:     if (totalIt>=maxIt) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of iterations reached. No convergence in DQDS");
689:     /* END: n=2 or n=1  % n=begin+1 or n=begin */
690:     if (n==begin+2) {
691:       sum = (L[n-2]+U[n-2]+U[n-1])/2;
692:       disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
693:       if (disc<=0) {  /* Complex case */
694:         /* Deflation 2 */
695: #if !defined(PETSC_USE_COMPLEX)
696:         wr[--n] = sum+acShift; if (wi) wi[n] = PetscSqrtReal(-disc);
697:         wr[--n] = sum+acShift; if (wi) wi[n] = -PetscSqrtReal(-disc);
698: #else
699:         wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
700:         wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
701: #endif
702:       } else { /* Real case */
703:         if (sum==0.0) {
704:           x1 = PetscSqrtReal(disc);
705:           x2 = -x1;
706:         } else {
707:           x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
708:           x2 = U[n-2]*U[n-1]/x1;
709:         }
710:         /* Deflation 2 */
711:         wr[--n] = x2+acShift;
712:         wr[--n] = x1+acShift;
713:       }
714:     } else { /* n=1   n=begin */
715:       /* deflation 1 */
716:       x1 = U[n-1]+acShift;
717:       wr[--n] = x1;
718:     }
719:     switch (n) {
720:       case 0:
721:         begin = -1;
722:         break;
723:       case 1:
724:         acShift = L[begin-1];
725:         begin = split[lastSplit];
726:         lastSplit--;
727:         break;
728:       default : /* n>=2 */
729:         acShift = L[begin-1];
730:         begin = split[lastSplit];
731:         lastSplit--;
732:     }
733:   }/* While begin~=-1 */
734:   for (i=0;i<dim;i++) {
735:     wr[i] = wr[i]+initialShift;
736:   }
737:   PetscFree(split);
738:   return(0);
739: }

743: PetscErrorCode DSSolve_GHIEP_DQDS_II(DS ds,PetscScalar *wr,PetscScalar *wi)
744: {
746:   PetscInt       i,off,ld,nwall,nwu;
747:   PetscScalar    *A,*B,*Q,*vi;
748:   PetscReal      *d,*e,*s,*a,*b,*c;

751: #if !defined(PETSC_USE_COMPLEX)
753: #endif
754:   ld = ds->ld;
755:   off = ds->l + ds->l*ld;
756:   A = ds->mat[DS_MAT_A];
757:   B = ds->mat[DS_MAT_B];
758:   Q = ds->mat[DS_MAT_Q];
759:   d = ds->rmat[DS_MAT_T];
760:   e = ds->rmat[DS_MAT_T] + ld;
761:   s = ds->rmat[DS_MAT_D];
762:   /* Quick return if possible */
763:   if (ds->n-ds->l == 1) {
764:     *(Q+off) = 1;
765:     if (!ds->compact) {
766:       d[ds->l] = PetscRealPart(A[off]);
767:       s[ds->l] = PetscRealPart(B[off]);
768:     }
769:     wr[ds->l] = d[ds->l]/s[ds->l];
770:     if (wi) wi[ds->l] = 0.0;
771:     return(0);
772:   }
773:   nwall = 12*ld+4;
774:   DSAllocateWork_Private(ds,0,nwall,0);
775:   /* Reduce to pseudotriadiagonal form */
776:   DSIntermediate_GHIEP(ds);

778:   /* Compute Eigenvalues (DQDS) */
779:   /* Form pseudosymmetric tridiagonal */
780:   a = ds->rwork;
781:   b = a+ld;
782:   c = b+ld;
783:   nwu = 3*ld;
784:   if (ds->compact) {
785:     for (i=ds->l;i<ds->n-1;i++) {
786:       a[i] = d[i]*s[i];
787:       b[i] = e[i]*s[i+1];
788:       c[i] = e[i]*s[i];
789:     }
790:     a[ds->n-1] = d[ds->n-1]*s[ds->n-1];
791:   } else {
792:     for (i=ds->l;i<ds->n-1;i++) {
793:       a[i] = PetscRealPart(A[i+i*ld]*B[i+i*ld]);
794:       b[i] = PetscRealPart(A[i+1+i*ld]*s[i+1]);
795:       c[i] = PetscRealPart(A[i+(i+1)*ld]*s[i]);
796:     }
797:     a[ds->n-1] = PetscRealPart(A[ds->n-1+(ds->n-1)*ld]*B[ds->n-1+(ds->n-1)*ld]);
798:   }
799:   vi = (wi)?wi+ds->l:NULL;
800:   DSGHIEP_Eigen3DQDS(ds->n-ds->l,a+ds->l,b+ds->l,c+ds->l,wr+ds->l,vi,ds->rwork+nwu);

802:   /* Compute Eigenvectors with Inverse Iteration */
803:   DSGHIEPInverseIteration(ds,wr,wi);

805:   /* Recover eigenvalues from diagonal */
806:   DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
807: #if defined(PETSC_USE_COMPLEX)
808:   if (wi) {
809:     for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
810:   }
811: #endif
812:   return(0);
813: }