Actual source code: dsbtdc.c

slepc-3.7.1 2016-05-27
Report Typos and Errors
  1: /*
  2:    BDC - Block-divide and conquer (see description in README file).

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.

 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <slepc/private/dsimpl.h>
 25: #include <slepcblaslapack.h>

 27: PetscErrorCode BDC_dsbtdc_(const char *jobz,const char *jobacc,PetscBLASInt n, 
 28:         PetscBLASInt nblks,PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d, 
 29:         PetscBLASInt l2d,PetscReal *e,PetscBLASInt l1e,PetscBLASInt l2e,PetscReal tol,
 30:         PetscReal tau1,PetscReal tau2,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,
 31:         PetscReal *work,PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork, 
 32:         PetscReal *mingap,PetscBLASInt *mingapi,PetscBLASInt *info, 
 33:         PetscBLASInt jobz_len,PetscBLASInt jobacc_len)
 34: {
 35: /*  -- Routine written in LAPACK Version 3.0 style -- */
 36: /* *************************************************** */
 37: /*     Written by */
 38: /*     Michael Moldaschl and Wilfried Gansterer */
 39: /*     University of Vienna */
 40: /*     last modification: March 28, 2014 */

 42: /*     Small adaptations of original code written by */
 43: /*     Wilfried Gansterer and Bob Ward, */
 44: /*     Department of Computer Science, University of Tennessee */
 45: /*     see http://dx.doi.org/10.1137/S1064827501399432 */
 46: /* *************************************************** */

 48: /*  Purpose */
 49: /*  ======= */

 51: /*  DSBTDC computes approximations to all eigenvalues and eigenvectors */
 52: /*  of a symmetric block tridiagonal matrix using the divide and */
 53: /*  conquer method with lower rank approximations to the subdiagonal blocks. */

 55: /*  This code makes very mild assumptions about floating point */
 56: /*  arithmetic. It will work on machines with a guard digit in */
 57: /*  add/subtract, or on those binary machines without guard digits */
 58: /*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
 59: /*  It could conceivably fail on hexadecimal or decimal machines */
 60: /*  without guard digits, but we know of none.  See DLAED3M for details. */

 62: /*  Arguments */
 63: /*  ========= */

 65: /*  JOBZ    (input) CHARACTER*1 */
 66: /*          = 'N':  Compute eigenvalues only (not implemented); */
 67: /*          = 'D':  Compute eigenvalues and eigenvectors. Eigenvectors */
 68: /*                  are accumulated in the divide-and-conquer process. */

 70: /*  JOBACC  (input) CHARACTER*1 */
 71: /*          = 'A' ("automatic"): The accuracy parameters TAU1 and TAU2 */
 72: /*                               are determined automatically from the */
 73: /*                               parameter TOL according to the analytical */
 74: /*                               bounds. In that case the input values of */
 75: /*                               TAU1 and TAU2 are irrelevant (ignored). */
 76: /*          = 'M' ("manual"): The input values of the accuracy parameters */
 77: /*                            TAU1 and TAU2 are used. In that case the input */
 78: /*                            value of the parameter TOL is irrelevant */
 79: /*                            (ignored). */

 81: /*  N       (input) INTEGER */
 82: /*          The dimension of the symmetric block tridiagonal matrix. */
 83: /*          N >= 1. */

 85: /*  NBLKS   (input) INTEGER, 1 <= NBLKS <= N */
 86: /*          The number of diagonal blocks in the matrix. */

 88: /*  KSIZES  (input) INTEGER array, dimension (NBLKS) */
 89: /*          The dimensions of the square diagonal blocks from top left */
 90: /*          to bottom right.  KSIZES(I) >= 1 for all I, and the sum of */
 91: /*          KSIZES(I) for I = 1 to NBLKS has to be equal to N. */

 93: /*  D       (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
 94: /*          The lower triangular elements of the symmetric diagonal */
 95: /*          blocks of the block tridiagonal matrix. The elements of the top */
 96: /*          left diagonal block, which is of dimension KSIZES(1), have to */
 97: /*          be placed in D(*,*,1); the elements of the next diagonal */
 98: /*          block, which is of dimension KSIZES(2), have to be placed in */
 99: /*          D(*,*,2); etc. */

101: /*  L1D     (input) INTEGER */
102: /*          The leading dimension of the array D.  L1D >= max(3,KMAX), */
103: /*          where KMAX is the dimension of the largest diagonal block, */
104: /*          i.e.,  KMAX = max_I ( KSIZES(I) ). */

106: /*  L2D     (input) INTEGER */
107: /*          The second dimension of the array D.  L2D >= max(3,KMAX), */
108: /*          where KMAX is as stated in L1D above. */

110: /*  E       (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
111: /*          The elements of the subdiagonal blocks of the */
112: /*          block tridiagonal matrix. The elements of the top left */
113: /*          subdiagonal block, which is KSIZES(2) x KSIZES(1), have to be */
114: /*          placed in E(*,*,1); the elements of the next subdiagonal block, */
115: /*          which is KSIZES(3) x KSIZES(2), have to be placed in E(*,*,2); etc. */
116: /*          During runtime, the original contents of E(*,*,K) is */
117: /*          overwritten by the singular vectors and singular values of */
118: /*          the lower rank representation. */

120: /*  L1E     (input) INTEGER */
121: /*          The leading dimension of the array E.  L1E >= max(3,2*KMAX+1), */
122: /*          where KMAX is as stated in L1D above. The size of L1E enables */
123: /*          the storage of ALL singular vectors and singular values for */
124: /*          the corresponding off-diagonal block in E(*,*,K) and therefore */
125: /*          there are no restrictions on the rank of the approximation */
126: /*          (only the "natural" restriction */
127: /*          RANK( K ) .LE. MIN( KSIZES( K ),KSIZES( K+1 ) )). */

129: /*  L2E     (input) INTEGER */
130: /*          The second dimension of the array E.  L2E >= max(3,2*KMAX+1), */
131: /*          where KMAX is as stated in L1D above. The size of L2E enables */
132: /*          the storage of ALL singular vectors and singular values for */
133: /*          the corresponding off-diagonal block in E(*,*,K) and therefore */
134: /*          there are no restrictions on the rank of the approximation */
135: /*          (only the "natural" restriction */
136: /*          RANK( K ) .LE. MIN( KSIZES( K ),KSIZES( K+1 ) )). */

138: /*  TOL     (input) DOUBLE PRECISION, TOL.LE.TOLMAX */
139: /*          User specified tolerance for the residuals of the computed */
140: /*          eigenpairs. If ( JOBACC.EQ.'A' ) then it is used to determine */
141: /*          TAU1 and TAU2; ignored otherwise. */
142: /*          If ( TOL.LT.40*EPS .AND. JOBACC.EQ.'A' ) then TAU1 is set to machine */
143: /*          epsilon and TAU2 is set to the standard deflation tolerance from */
144: /*          LAPACK. */

146: /*  TAU1    (input) DOUBLE PRECISION, TAU1.LE.TOLMAX/2 */
147: /*          User specified tolerance for determining the rank of the */
148: /*          lower rank approximations to the off-diagonal blocks. */
149: /*          The rank for each off-diagonal block is determined such that */
150: /*          the resulting absolute eigenvalue error is less than or equal */
151: /*          to TAU1. */
152: /*          If ( JOBACC.EQ.'A' ) then TAU1 is determined automatically from */
153: /*             TOL and the input value is ignored. */
154: /*          If ( JOBACC.EQ.'M' .AND. TAU1.LT.20*EPS ) then TAU1 is set to */
155: /*             machine epsilon. */

157: /*  TAU2    (input) DOUBLE PRECISION, TAU2.LE.TOLMAX/2 */
158: /*          User specified deflation tolerance for the routine DIBTDC. */
159: /*          If ( 1.0D-1.GT.TAU2.GT.20*EPS ) then TAU2 is used as */
160: /*          the deflation tolerance in DSRTDF (EPS is the machine epsilon). */
161: /*          If ( TAU2.LE.20*EPS ) then the standard deflation tolerance from */
162: /*          LAPACK is used as the deflation tolerance in DSRTDF. */
163: /*          If ( JOBACC.EQ.'A' ) then TAU2 is determined automatically from */
164: /*             TOL and the input value is ignored. */
165: /*          If ( JOBACC.EQ.'M' .AND. TAU2.LT.20*EPS ) then TAU2 is set to */
166: /*             the standard deflation tolerance from LAPACK. */

168: /*  EV      (output) DOUBLE PRECISION array, dimension (N) */
169: /*          If INFO = 0, then EV contains the computed eigenvalues of the */
170: /*          symmetric block tridiagonal matrix in ascending order. */

172: /*  Z       (output) DOUBLE PRECISION array, dimension (LDZ,N) */
173: /*          If ( JOBZ.EQ.'D' .AND. INFO = 0 ) */
174: /*          then Z contains the orthonormal eigenvectors of the symmetric */
175: /*          block tridiagonal matrix computed by the routine DIBTDC */
176: /*          (accumulated in the divide-and-conquer process). */
177: /*          If ( -199 < INFO < -99 ) then Z contains the orthonormal */
178: /*          eigenvectors of the symmetric block tridiagonal matrix, */
179: /*          computed without divide-and-conquer (quick returns). */
180: /*          Otherwise not referenced. */

182: /*  LDZ     (input) INTEGER */
183: /*          The leading dimension of the array Z.  LDZ >= max(1,N). */

185: /*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */

187: /*  LWORK   (input) INTEGER */
188: /*          The dimension of the array WORK. */
189: /*          If NBLKS.EQ.1, then LWORK has to be at least 2N^2+6N+1 */
190: /*          (for the call of DSYEVD). */
191: /*          If NBLKS.GE.2 and ( JOBZ.EQ.'D' ) then the absolute minimum */
192: /*             required for DIBTDC is ( N**2 + 3*N ). This will not always */
193: /*             suffice, though, the routine will return a corresponding */
194: /*             error code and report how much work space was missing (see */
195: /*             INFO). */
196: /*          In order to guarantee correct results in all cases where */
197: /*          NBLKS.GE.2, LWORK must be at least ( 2*N**2 + 3*N ). */

199: /*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK) */

201: /*  LIWORK  (input) INTEGER */
202: /*          The dimension of the array IWORK. */
203: /*          LIWORK must be at least ( 5*N + 5*NBLKS - 1 ) (for DIBTDC) */
204: /*          Note that this should also suffice for the call of DSYEVD on a */
205: /*          diagonal block which requires ( 5*KMAX + 3 ). */


208: /*  MINGAP  (output) DOUBLE PRECISION */
209: /*          The minimum "gap" between the approximate eigenvalues */
210: /*          computed, i.e., MIN( ABS( EV( I+1 )-EV( I ) ) for I=1,2,..., N-1 */
211: /*          IF ( MINGAP.LE.TOL/10 ) THEN a warning flag is returned in INFO, */
212: /*          because the computed eigenvectors may be unreliable individually */
213: /*          (only the subspaces spanned are approximated reliably). */

215: /*  MINGAPI (output) INTEGER */
216: /*          Index I where the minimum gap in the spectrum occurred. */

218: /*  INFO    (output) INTEGER */
219: /*          = 0:  successful exit, no special cases occurred. */
220: /*          < -200: not enough workspace. Space for ABS(INFO + 200) */
221: /*                numbers is required in addition to the workspace provided, */
222: /*                otherwise some of the computed eigenvectors will be incorrect. */
223: /*          < -99, > -199: successful exit, but quick returns. */
224: /*                if INFO = -100, successful exit, but the input matrix */
225: /*                                was the zero matrix and no */
226: /*                                divide-and-conquer was performed */
227: /*                if INFO = -101, successful exit, but N was 1 and no */
228: /*                                divide-and-conquer was performed */
229: /*                if INFO = -102, successful exit, but only a single */
230: /*                                dense block. Standard dense solver */
231: /*                                was called, no divide-and-conquer was */
232: /*                                performed */
233: /*                if INFO = -103, successful exit, but warning that */
234: /*                                MINGAP.LE.TOL/10 and therefore the */
235: /*                                eigenvectors corresponding to close */
236: /*                                approximate eigenvalues may individually */
237: /*                                be unreliable (although taken together they */
238: /*                                do approximate the corresponding subspace to */
239: /*                                the desired accuracy) */
240: /*          = -99: error in the preprocessing in DIBTDC (when determining */
241: /*                 the merging order). */
242: /*          < 0, > -99: illegal arguments. */
243: /*                if INFO = -i, the i-th argument had an illegal value. */
244: /*          > 0:  The algorithm failed to compute an eigenvalue while */
245: /*                working on the submatrix lying in rows and columns */
246: /*                INFO/(N+1) through mod(INFO,N+1). */

248: /*  Further Details */
249: /*  =============== */

251: /*  Small modifications of code written by */
252: /*     Wilfried Gansterer and Bob Ward, */
253: /*     Department of Computer Science, University of Tennessee */
254: /*     see http://dx.doi.org/10.1137/S1064827501399432 */

256: /*  Based on the design of the LAPACK code sstedc.f written by Jeff */
257: /*  Rutter, Computer Science Division, University of California at */
258: /*  Berkeley, and modified by Francoise Tisseur, University of Tennessee. */

260: /*  ===================================================================== */

262: /*     .. Parameters .. */

264: #define TOLMAX 0.1

266: /*        TOLMAX       .... upper bound for tolerances TOL, TAU1, TAU2 */
267: /*                          NOTE: in the routine DIBTDC, the value */
268: /*                                1.D-1 is hardcoded for TOLMAX ! */

270: #if defined(SLEPC_MISSING_LAPACK_SYEVD) || defined(PETSC_MISSING_LAPACK_GESVD) || defined(SLEPC_MISSING_LAPACK_LASET) || defined(SLEPC_MISSING_LAPACK_LASCL)
272:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYEVD/GESVD/LASET/LASCL - Lapack routine is unavailable");
273: #else
274:   PetscBLASInt   i, j, k, i1, iwspc, lwmin, start;
275:   PetscBLASInt   ii, ip, jp, nk, rk, np, iu, rp1, ldu;
276:   PetscBLASInt   ksk, ivt, iend, kchk, kmax, one=1, zero=0;
277:   PetscBLASInt   ldvt, ksum, kskp1, spneed, nrblks, liwmin, isvals;
278:   PetscReal      p, d2, eps, dmax, emax, done = 1.0, dzero = 0.0;
279:   PetscReal      dnrm, tiny, anorm, exdnrm=0, dropsv, absdiff;

283:   /* Determine machine epsilon. */
284:   eps = LAPACKlamch_("Epsilon");

286:   *info = 0;

288:   if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') {
289:     *info = -1;
290:   } else if (*(unsigned char *)jobacc != 'A' && *(unsigned char *)jobacc != 'M') {
291:     *info = -2;
292:   } else if (n < 1) {
293:     *info = -3;
294:   } else if (nblks < 1 || nblks > n) {
295:     *info = -4;
296:   }
297:   if (*info == 0) {
298:     ksum = 0;
299:     kmax = 0;
300:     kchk = 0;
301:     for (k = 0; k < nblks; ++k) {
302:       ksk = ksizes[k];
303:       ksum += ksk;
304:       if (ksk > kmax) kmax = ksk;
305:       if (ksk < 1) kchk = 1;
306:     }
307:     if (nblks == 1) lwmin = 2*n*n + n*6 + 1;
308:     else lwmin = n*n + n*3;
309:     liwmin = n * 5 + nblks * 5 - 4;
310:     if (ksum != n || kchk == 1) {
311:       *info = -5;
312:     } else if (l1d < PetscMax(3,kmax)) {
313:       *info = -7;
314:     } else if (l2d < PetscMax(3,kmax)) {
315:       *info = -8;
316:     } else if (l1e < PetscMax(3,2*kmax+1)) {
317:       *info = -10;
318:     } else if (l2e < PetscMax(3,2*kmax+1)) {
319:       *info = -11;
320:     } else if (*(unsigned char *)jobacc == 'A' && tol > TOLMAX) {
321:       *info = -12;
322:     } else if (*(unsigned char *)jobacc == 'M' && tau1 > TOLMAX/2) {
323:       *info = -13;
324:     } else if (*(unsigned char *)jobacc == 'M' && tau2 > TOLMAX/2) {
325:       *info = -14;
326:     } else if (ldz < PetscMax(1,n)) {
327:       *info = -17;
328:     } else if (lwork < lwmin) {
329:       *info = -19;
330:     } else if (liwork < liwmin) {
331:       *info = -21;
332:     }
333:   }

335:   if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DSBTDC",-(*info));

337:   /* Quick return if possible */

339:   if (n == 1) {
340:     ev[0] = d[0];
341:     z[0] = 1.;
342:     *info = -101;
343:     return(0);
344:   }

346:   /* If NBLKS is equal to 1, then solve the problem with standard */
347:   /* dense solver (in this case KSIZES(1) = N). */

349:   if (nblks == 1) {
350:     PetscPrintf(PETSC_COMM_WORLD," dsbtdc: This branch still needs to be checked!\n");
351:     for (i = 0; i < n; ++i) {
352:       for (j = 0; j <= i; ++j) {
353:         z[i + j*ldz] = d[i + j*l1d];
354:       }
355:     }
356:     PetscStackCallBLAS("LAPACKsyevd",LAPACKsyevd_("V", "L", &n, z, &ldz, ev, work, &lwork, iwork, &liwork, info));
357:     if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DSYEVD, info = %d",*info);
358:     *info = -102;
359:     return(0);
360:   }

362:   /* determine the accuracy parameters (if requested) */

364:   if (*(unsigned char *)jobacc == 'A') {
365:     tau1 = tol / 2;
366:     if (tau1 < eps * 20) tau1 = eps;
367:     tau2 = tol / 2;
368:   }

370:   /* Initialize Z as the identity matrix */

372:   if (*(unsigned char *)jobz == 'D') {
373:     PetscStackCallBLAS("LAPACKlaset",LAPACKlaset_("Full", &n, &n, &dzero, &done, z, &ldz));
374:   }

376:   /* Determine the off-diagonal ranks, form and store the lower rank */
377:   /* approximations based on the tolerance parameters, the */
378:   /* RANK( K ) largest singular values and the associated singular */
379:   /* vectors of each subdiagonal block. Also find the maximum norm of */
380:   /* the subdiagonal blocks (in EMAX). */
381:   
382:   /* Compute SVDs of the subdiagonal blocks.... */

384:   /* EMAX .... maximum norm of the off-diagonal blocks */

386:   emax = 0.;
387:   for (k = 0; k < nblks-1; ++k) {
388:     ksk = ksizes[k];
389:     kskp1 = ksizes[k+1];
390:     isvals = 0;

392:     /* Note that min(KSKP1,KSK).LE.N/2 (equality possible for */
393:     /* NBLKS=2), and therefore storing the singular values requires */
394:     /* at most N/2 entries of the *        array WORK. */

396:     iu = isvals + n / 2;
397:     ivt = isvals + n / 2;

399:     /* Call of DGESVD: The space for U is not referenced, since */
400:     /* JOBU='O' and therefore this portion of the array WORK */
401:     /* is not referenced for U. */

403:     ldu = kskp1;
404:     ldvt = PetscMin(kskp1,ksk);
405:     iwspc = ivt + n * n / 2;

407:     /* Note that the minimum workspace required for this call */
408:     /* of DGESVD is: N/2 for storing the singular values + N**2/2 for */
409:     /* storing V^T + 5*N/2 workspace =  N**2/2 + 3*N. */

411:     i1 = lwork - iwspc;
412:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O", "S", &kskp1, &ksk,
413:             &e[k*l1e*l2e], &l1e, &work[isvals],
414:             &work[iu], &ldu, &work[ivt], &ldvt, &work[iwspc], &i1, info));
415:     if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DGESVD, info = %d",*info);

417:     /* Note that after the return from DGESVD U is stored in */
418:     /* E(*,*,K), and V^{\top} is stored in WORK( IVT, IVT+1, .... ) */

420:     /* determine the ranks RANK() for the approximations */

422:     rk = PetscMin(ksk,kskp1);
423: L8:
424:     dropsv = work[isvals - 1 + rk];

426:     if (dropsv * 2. <= tau1) {

428:       /* the error caused by dropping singular value RK is */
429:       /* small enough, try to reduce the rank by one more */

431:       --rk;
432:       if (rk > 0) goto L8;
433:       else iwork[k] = 0;
434:     } else {

436:       /* the error caused by dropping singular value RK is */
437:       /* too large already, RK is the rank required to achieve the */
438:       /* desired accuracy */

440:       iwork[k] = rk;
441:     }

443: /* ************************************************************************** */

445:     /* Store the first RANK( K ) terms of the SVD of the current */
446:     /* off-diagonal block. */
447:     /* NOTE that here it is required that L1E, L2E >= 2*KMAX+1 in order */
448:     /* to have enough space for storing singular vectors and values up */
449:     /* to the full SVD of an off-diagonal block !!!! */
450:    
451:     /* u1-u_RANK(K) is already contained in E(:,1:RANK(K),K) (as a */
452:     /* result of the call of DGESVD !), the sigma1-sigmaK are to be */
453:     /* stored in E(1:RANK(K),RANK(K)+1,K),  and v1-v_RANK(K) are to be */
454:     /* stored in E(:,RANK(K)+2:2*RANK(K)+1,K) */

456:     rp1 = iwork[k];
457:     for (j = 0; j < iwork[k]; ++j) {

459:       /* store sigma_J in E( J,RANK( K )+1,K ) */

461:       e[j + (rp1 + k*l2e)* l1e] = work[isvals + j];

463:       /* update maximum norm of subdiagonal blocks */

465:       if (e[j + (rp1 + k*l2e)*l1e] > emax) {
466:         emax = e[j + (rp1 + k*l2e)*l1e];
467:       }

469:       /* store v_J in E( :,RANK( K )+1+J,K ) */
470:       /* (note that WORK contains V^{\top} and therefore */
471:       /* we need to read rowwise !) */

473:       for (i = 1; i <= ksk; ++i) {
474:         e[i-1 + (rp1+j+1 + k*l2e)*l1e] = work[ivt+j + (i-1)*ldvt];
475:       }
476:     }

478:   }

480:   /* Compute the maximum norm of diagonal blocks and store the norm */
481:   /* of each diagonal block in E(RP1,RP1,K) (after the singular values); */
482:   /* store the norm of the last diagonal block in EXDNRM. */
483:   
484:   /* DMAX .... maximum one-norm of the diagonal blocks */

486:   dmax = 0.;
487:   for (k = 0; k < nblks; ++k) {
488:     rp1 = iwork[k];

490:     /* compute the one-norm of diagonal block K */

492:     dnrm = LAPACKlansy_("1", "L", &ksizes[k], &d[k*l1d*l2d], &l1d, work);
493:     if (k+1 == nblks) exdnrm = dnrm;
494:     else e[rp1 + (rp1 + k*l2e)*l1e] = dnrm;
495:     if (dnrm > dmax) dmax = dnrm;
496:   }

498:   /* Check for zero matrix. */

500:   if (emax == 0. && dmax == 0.) {
501:     for (i = 0; i < n; ++i) ev[i] = 0.;
502:     *info = -100;
503:     return(0);
504:   }

506: /* **************************************************************** */

508:   /* ....Identify irreducible parts of the block tridiagonal matrix */
509:   /* [while ( START <= NBLKS )].... */

511:   start = 0;
512:   np = 0;
513: L10:
514:   if (start < nblks) {

516:     /* Let IEND be the number of the next subdiagonal block such that */
517:     /* its RANK is 0 or IEND = NBLKS if no such subdiagonal exists. */
518:     /* The matrix identified by the elements between the diagonal block START */
519:     /* and the diagonal block IEND constitutes an independent (irreducible) */
520:     /* sub-problem. */

522:     iend = start;

524: L20:
525:     if (iend < nblks) {
526:       rk = iwork[iend];

528:       /* NOTE: if RANK( IEND ).EQ.0 then decoupling happens due to */
529:       /*       reduced accuracy requirements ! (because in this case */
530:       /*       we would not merge the corresponding two diagonal blocks) */
531:       
532:       /* NOTE: seems like any combination may potentially happen: */
533:       /*       (i) RANK = 0 but no decoupling due to small norm of */
534:       /*           off-diagonal block (corresponding diagonal blocks */
535:       /*           also have small norm) as well as */
536:       /*       (ii) RANK > 0 but decoupling due to small norm of */
537:       /*           off-diagonal block (corresponding diagonal blocks */
538:       /*           have very large norm) */
539:       /*       case (i) is ruled out by checking for RANK = 0 above */
540:       /*       (we decide to decouple all the time when the rank */
541:       /*       of an off-diagonal block is zero, independently of */
542:       /*       the norms of the corresponding diagonal blocks. */

544:       if (rk > 0) {

546:         /* check for decoupling due to small norm of off-diagonal block */
547:         /* (relative to the norms of the corresponding diagonal blocks) */

549:         if (iend == nblks-2) {
550:           d2 = PetscSqrtReal(exdnrm);
551:         } else {
552:           d2 = PetscSqrtReal(e[iwork[iend+1] + (iwork[iend+1] + (iend+1)*l2e)*l1e]);
553:         }

555:         /* this definition of TINY is analogous to the definition */
556:         /* in the tridiagonal divide&conquer (dstedc) */

558:         tiny = eps * PetscSqrtReal(e[iwork[iend] + (iwork[iend] + iend*l2e)*l1e])*d2;
559:         if (e[(iwork[iend] + iend*l2e)*l1e] > tiny) {

561:           /* no decoupling due to small norm of off-diagonal block */

563:           ++iend;
564:           goto L20;
565:         }
566:       }
567:     }

569:     /* ....(Sub) Problem determined: between diagonal blocks */
570:     /*     START and IEND. Compute its size and solve it.... */

572:     nrblks = iend - start + 1;
573:     if (nrblks == 1) {

575:       /* Isolated problem is a single diagonal block */

577:       nk = ksizes[start];

579:       /* copy this isolated block into Z */

581:       for (i = 0; i < nk; ++i) {
582:         ip = np + i + 1;
583:         for (j = 0; j <= i; ++j) {
584:           jp = np + j + 1;
585:           z[ip + jp*ldz] = d[i + (j + start*l2d)*l1d];
586:         }
587:       }

589:       /* check whether there is enough workspace */

591:       spneed = 2*nk*nk + nk * 6 + 1;
592:       if (spneed > lwork) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: not enough workspace for DSYEVD, info = %d",lwork - 200 - spneed);

594:       PetscStackCallBLAS("LAPACKsyevd",LAPACKsyevd_("V", "L", &nk,
595:                     &z[np + np*ldz], &ldz, &ev[np],
596:                     work, &lwork, &iwork[nblks-1], &liwork, info));
597:       if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DSYEVD, info = %d",*info);
598:       start = iend + 1;
599:       np += nk;

601:       /* go to the next irreducible subproblem */

603:       goto L10;
604:     }

606:     /* ....Isolated problem consists of more than one diagonal block. */
607:     /*     Start the divide and conquer algorithm.... */
608:     
609:     /* Scale: Divide by the maximum of all norms of diagonal blocks */
610:     /*        and singular values of the subdiagonal blocks */
611:     
612:     /* ....determine maximum of the norms of all diagonal and subdiagonal */
613:     /*     blocks.... */

615:     if (iend == nblks-1) anorm = exdnrm;
616:     else anorm = e[iwork[iend] + (iwork[iend] + iend*l2e)*l1e];
617:     for (k = start; k < iend; ++k) {
618:       rp1 = iwork[k];

620:       /* norm of diagonal block */
621:       anorm = PetscMax(anorm,e[rp1 + (rp1 + k*l2e)*l1e]);

623:       /* singular value of subdiagonal block */
624:       anorm = PetscMax(anorm,e[(rp1 + k*l2e)*l1e]);
625:     }

627:     nk = 0;
628:     for (k = start; k < iend+1; ++k) {
629:       ksk = ksizes[k];
630:       nk += ksk;

632:       /* scale the diagonal block */
633:       PetscStackCallBLAS("LAPACKlascl",LAPACKlascl_("L", &zero, &zero,
634:                     &anorm, &done, &ksk, &ksk, &d[k*l2d*l1d], &l1d, info));
635:       if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DLASCL, info = %d",*info);

637:       /* scale the (approximated) off-diagonal block by dividing its */
638:       /* singular values */

640:       if (k != iend) {

642:         /* the last subdiagonal block has index IEND-1 !!!! */
643:         for (i = 0; i < iwork[k]; ++i) {
644:           e[i + (iwork[k] + k*l2e)*l1e] /= anorm;
645:         }
646:       }
647:     }

649:     /* call the block-tridiagonal divide-and-conquer on the */
650:     /* irreducible subproblem which has been identified */

652:     BDC_dibtdc_(jobz, nk, nrblks, &ksizes[start], &d[start*l1d*l2d], l1d, l2d,
653:                 &e[start*l2e*l1e], &iwork[start], l1e, l2e, tau2, &ev[np],
654:                 &z[np + np*ldz], ldz, work, lwork, &iwork[nblks-1], liwork, info, 1);
655:                 
656:     if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DIBTDC, info = %d",*info);

658: /* ************************************************************************** */

660:     /* Scale back the computed eigenvalues. */

662:     PetscStackCallBLAS("LAPACKlascl",LAPACKlascl_("G", &zero, &zero, &done,
663:             &anorm, &nk, &one, &ev[np], &nk, info));
664:     if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dsbtdc: Error in DLASCL, info = %d",*info);

666:     start = iend + 1;
667:     np += nk;

669:     /* Go to the next irreducible subproblem. */

671:     goto L10;
672:   }

674:   /* ....If the problem split any number of times, then the eigenvalues */
675:   /* will not be properly ordered. Here we permute the eigenvalues */
676:   /* (and the associated eigenvectors) across the irreducible parts */
677:   /* into ascending order.... */
678:   
679:   /*  IF( NRBLKS.LT.NBLKS )THEN */
680:   
681:   /*    Use Selection Sort to minimize swaps of eigenvectors */

683:   for (ii = 1; ii < n; ++ii) {
684:     i = ii;
685:     k = i;
686:     p = ev[i];
687:     for (j = ii; j < n; ++j) {
688:       if (ev[j] < p) {
689:         k = j;
690:         p = ev[j];
691:       }
692:     }
693:     if (k != i) {
694:       ev[k] = ev[i];
695:       ev[i] = p;
696:       PetscStackCallBLAS("BLASswap",BLASswap_(&n, &z[i*ldz], &one, &z[k*ldz], &one));
697:     }
698:   }

700:   /* ...Compute MINGAP (minimum difference between neighboring eigenvalue */
701:   /*    approximations).............................................. */

703:   *mingap = ev[1] - ev[0];
704:   if (*mingap < 0.) SETERRQ2(PETSC_COMM_SELF,1,"dsbtdc: Eigenvalue approximations are not ordered properly. Approximation %d is larger than approximation %d.",1,2);
705:   *mingapi = 1;
706:   for (i = 2; i < n; ++i) {
707:     absdiff = ev[i] - ev[i-1];
708:     if (absdiff < 0.) {
709:       SETERRQ2(PETSC_COMM_SELF,1,"dsbtdc: Eigenvalue approximations are not ordered properly. Approximation %d is larger than approximation %d.",i,i+1);
710:     } else if (absdiff < *mingap) {
711:       *mingap = absdiff;
712:       *mingapi = i;
713:     }
714:   }

716:   /* check whether the minimum gap between eigenvalue approximations */
717:   /* may indicate severe inaccuracies in the eigenvector approximations */

719:   if (*mingap <= tol / 10) *info = -103;
720:   return(0);
721: #endif
722: }