1: /*
2: BDC - Block-divide and conquer (see description in README file).
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc/private/dsimpl.h>
25: #include <slepcblaslapack.h>
27: static PetscErrorCode cutlr_(PetscBLASInt start,PetscBLASInt n,PetscBLASInt blkct, 28: PetscBLASInt *bsizes,PetscBLASInt *ranks,PetscBLASInt *cut, 29: PetscBLASInt *lsum,PetscBLASInt *lblks,PetscBLASInt *info) 30: {
31: /* -- Routine written in LAPACK Version 3.0 style -- */
32: /* *************************************************** */
33: /* Written by */
34: /* Michael Moldaschl and Wilfried Gansterer */
35: /* University of Vienna */
36: /* last modification: March 16, 2014 */
38: /* Small adaptations of original code written by */
39: /* Wilfried Gansterer and Bob Ward, */
40: /* Department of Computer Science, University of Tennessee */
41: /* see http://dx.doi.org/10.1137/S1064827501399432 */
42: /* *************************************************** */
44: /* Purpose */
45: /* ======= */
47: /* CUTLR computes the optimal cut in a sequence of BLKCT neighboring */
48: /* blocks whose sizes are given by the array BSIZES. */
49: /* The sum of all block sizes in the sequence considered is given by N. */
50: /* The cut is optimal in the sense that the difference of the sizes of */
51: /* the resulting two halves is minimum over all cuts with minimum ranks */
52: /* between blocks of the sequence considered. */
54: /* Arguments */
55: /* ========= */
57: /* START (input) INTEGER */
58: /* In the original array KSIZES of the calling routine DIBTDC, */
59: /* the position where the sequence considered in this routine starts. */
60: /* START >= 1. */
62: /* N (input) INTEGER */
63: /* The sum of all the block sizes of the sequence to be cut = */
64: /* = sum_{i=1}^{BLKCT} BSIZES( I ). */
65: /* N >= 3. */
67: /* BLKCT (input) INTEGER */
68: /* The number of blocks in the sequence to be cut. */
69: /* BLKCT >= 3. */
71: /* BSIZES (input) INTEGER array, dimension (BLKCT) */
72: /* The dimensions of the (quadratic) blocks of the sequence to be */
73: /* cut. sum_{i=1}^{BLKCT} BSIZES( I ) = N. */
75: /* RANKS (input) INTEGER array, dimension (BLKCT-1) */
76: /* The ranks determining the approximations of the off-diagonal */
77: /* blocks in the sequence considered. */
79: /* CUT (output) INTEGER */
80: /* After the optimum cut has been determined, the position (in the */
81: /* overall problem as worked on in DIBTDC !) of the last block in */
82: /* the first half of the sequence to be cut. */
83: /* START <= CUT <= START+BLKCT-2. */
85: /* LSUM (output) INTEGER */
86: /* After the optimum cut has been determined, the sum of the */
87: /* block sizes in the first half of the sequence to be cut. */
88: /* LSUM < N. */
90: /* LBLKS (output) INTEGER */
91: /* After the optimum cut has been determined, the number of the */
92: /* blocks in the first half of the sequence to be cut. */
93: /* 1 <= LBLKS < BLKCT. */
95: /* INFO (output) INTEGER */
96: /* = 0: successful exit. */
97: /* < 0: illegal arguments. */
98: /* if INFO = -i, the i-th (input) argument had an illegal */
99: /* value. */
100: /* > 0: illegal results. */
101: /* if INFO = i, the i-th (output) argument had an illegal */
102: /* value. */
104: /* Further Details */
105: /* =============== */
107: /* Based on code written by */
108: /* Wilfried Gansterer and Bob Ward, */
109: /* Department of Computer Science, University of Tennessee */
111: /* ===================================================================== */
113: PetscBLASInt i, ksk, kchk, ksum, nhalf, deviat, mindev, minrnk, tmpsum;
116: *info = 0;
117: *lblks = 1;
118: *lsum = 1;
119: *cut = start;
121: if (start < 1) {
122: *info = -1;
123: } else if (n < 3) {
124: *info = -2;
125: } else if (blkct < 3) {
126: *info = -3;
127: }
128: if (*info == 0) {
129: ksum = 0;
130: kchk = 0;
131: for (i = 0; i < blkct; ++i) {
132: ksk = bsizes[i];
133: ksum += ksk;
134: if (ksk < 1) kchk = 1;
135: }
136: if (ksum != n || kchk == 1) *info = -4;
137: }
138: if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in CUTLR",-(*info));
140: /* determine smallest rank in the range considered */
142: minrnk = n;
143: for (i = 0; i < blkct-1; ++i) {
144: if (ranks[i] < minrnk) minrnk = ranks[i];
145: }
147: /* determine best cut among those with smallest rank */
149: nhalf = n / 2;
150: tmpsum = 0;
151: mindev = n;
152: for (i = 0; i < blkct; ++i) {
153: tmpsum += bsizes[i];
154: if (ranks[i] == minrnk) {
156: /* determine deviation from "optimal" cut NHALF */
158: deviat = tmpsum - nhalf;
159: if (deviat<0) deviat = -deviat;
161: /* compare to best deviation so far */
163: if (deviat < mindev) {
164: mindev = deviat;
165: *cut = start + i;
166: *lblks = i + 1;
167: *lsum = tmpsum;
168: }
169: }
170: }
172: if (*cut < start || *cut >= start + blkct - 1) {
173: *info = 6;
174: } else if (*lsum < 1 || *lsum >= n) {
175: *info = 7;
176: } else if (*lblks < 1 || *lblks >= blkct) {
177: *info = 8;
178: }
179: return(0);
180: }
183: PetscErrorCode BDC_dibtdc_(const char *jobz,PetscBLASInt n,PetscBLASInt nblks, 184: PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,PetscBLASInt l2d, 185: PetscReal *e,PetscBLASInt *rank,PetscBLASInt l1e,PetscBLASInt l2e,186: PetscReal tol,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,PetscReal *work, 187: PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork,188: PetscBLASInt *info,PetscBLASInt jobz_len)189: {
190: /* -- Routine written in LAPACK Version 3.0 style -- */
191: /* *************************************************** */
192: /* Written by */
193: /* Michael Moldaschl and Wilfried Gansterer */
194: /* University of Vienna */
195: /* last modification: March 16, 2014 */
197: /* Small adaptations of original code written by */
198: /* Wilfried Gansterer and Bob Ward, */
199: /* Department of Computer Science, University of Tennessee */
200: /* see http://dx.doi.org/10.1137/S1064827501399432 */
201: /* *************************************************** */
203: /* Purpose */
204: /* ======= */
206: /* DIBTDC computes all eigenvalues and corresponding eigenvectors of a */
207: /* symmetric irreducible block tridiagonal matrix with rank RANK matrices */
208: /* as the subdiagonal blocks using a block divide and conquer method. */
210: /* Arguments */
211: /* ========= */
213: /* JOBZ (input) CHARACTER*1 */
214: /* = 'N': Compute eigenvalues only (not implemented); */
215: /* = 'D': Compute eigenvalues and eigenvectors. */
216: /* Eigenvectors are accumulated in the */
217: /* divide-and-conquer process. */
219: /* N (input) INTEGER */
220: /* The dimension of the symmetric irreducible block tridiagonal */
221: /* matrix. N >= 2. */
223: /* NBLKS (input) INTEGER, 2 <= NBLKS <= N */
224: /* The number of diagonal blocks in the matrix. */
226: /* KSIZES (input) INTEGER array, dimension (NBLKS) */
227: /* The dimension of the square diagonal blocks from top left */
228: /* to bottom right. KSIZES(I) >= 1 for all I, and the sum of */
229: /* KSIZES(I) for I = 1 to NBLKS has to be equal to N. */
231: /* D (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
232: /* The lower triangular elements of the symmetric diagonal */
233: /* blocks of the block tridiagonal matrix. Elements of the top */
234: /* left diagonal block, which is of dimension KSIZES(1), are */
235: /* contained in D(*,*,1); the elements of the next diagonal */
236: /* block, which is of dimension KSIZES(2), are contained in */
237: /* D(*,*,2); etc. */
239: /* L1D (input) INTEGER */
240: /* The leading dimension of the array D. L1D >= max(3,KMAX), */
241: /* where KMAX is the dimension of the largest diagonal block. */
243: /* L2D (input) INTEGER */
244: /* The second dimension of the array D. L2D >= max(3,KMAX), */
245: /* where KMAX is as stated in L1D above. */
247: /* E (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
248: /* Contains the elements of the scalars (singular values) and */
249: /* vectors (singular vectors) defining the rank RANK subdiagonal */
250: /* blocks of the matrix. */
251: /* E(1:RANK(K),RANK(K)+1,K) holds the RANK(K) scalars, */
252: /* E(:,1:RANK(K),K) holds the RANK(K) column vectors, and */
253: /* E(:,RANK(K)+2:2*RANK(K)+1,K) holds the row vectors for the K-th */
254: /* subdiagonal block. */
256: /* RANK (input) INTEGER array, dimension (NBLKS-1). */
257: /* The ranks of all the subdiagonal blocks contained in the array E. */
258: /* RANK( K ) <= MIN( KSIZES( K ), KSIZES( K+1 ) ) */
260: /* L1E (input) INTEGER */
261: /* The leading dimension of the array E. L1E >= max(3,2*KMAX+1), */
262: /* where KMAX is as stated in L1D above. */
264: /* L2E (input) INTEGER */
265: /* The second dimension of the array E. L2E >= max(3,2*KMAX+1), */
266: /* where KMAX is as stated in L1D above. */
268: /* TOL (input) DOUBLE PRECISION, TOL <= 1.0D-1 */
269: /* User specified deflation tolerance for the routine DMERG2. */
270: /* If ( 1.0D-1 >= TOL >= 20*EPS ) then TOL is used as */
271: /* the deflation tolerance in DSRTDF. */
272: /* If ( TOL < 20*EPS ) then the standard deflation tolerance from */
273: /* LAPACK is used as the deflation tolerance in DSRTDF. */
275: /* EV (output) DOUBLE PRECISION array, dimension (N) */
276: /* If INFO = 0, then EV contains the eigenvalues of the */
277: /* symmetric block tridiagonal matrix in ascending order. */
279: /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */
280: /* On entry, Z will be the identity matrix. */
281: /* On exit, Z contains the eigenvectors of the block tridiagonal */
282: /* matrix. */
284: /* LDZ (input) INTEGER */
285: /* The leading dimension of the array Z. LDZ >= max(1,N). */
287: /* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) */
289: /* LWORK (input) INTEGER */
290: /* The dimension of the array WORK. */
291: /* In order to guarantee correct results in all cases, */
292: /* LWORK must be at least ( 2*N**2 + 3*N ). In many cases, */
293: /* less workspace is required. The absolute minimum required is */
294: /* ( N**2 + 3*N ). */
295: /* If the workspace provided is not sufficient, the routine will */
296: /* return a corresponding error code and report how much workspace */
297: /* was missing (see INFO). */
299: /* IWORK (workspace) INTEGER array, dimension (LIWORK) */
301: /* LIWORK (input) INTEGER */
302: /* The dimension of the array IWORK. */
303: /* LIWORK must be at least ( 5*N + 3 + 4*NBLKS - 4 ): */
304: /* 5*KMAX+3 for DSYEVD, 5*N for ????, */
305: /* 4*NBLKS-4 for the preprocessing (merging order) */
306: /* Summarizing, the minimum integer workspace needed is */
307: /* MAX( 5*N, 5*KMAX + 3 ) + 4*NBLKS - 4 */
309: /* INFO (output) INTEGER */
310: /* = 0: successful exit. */
311: /* < 0, > -99: illegal arguments. */
312: /* if INFO = -i, the i-th argument had an illegal value. */
313: /* = -99: error in the preprocessing (call of CUTLR). */
314: /* < -200: not enough workspace. Space for ABS(INFO + 200) */
315: /* numbers is required in addition to the workspace provided, */
316: /* otherwise some eigenvectors will be incorrect. */
317: /* > 0: The algorithm failed to compute an eigenvalue while */
318: /* working on the submatrix lying in rows and columns */
319: /* INFO/(N+1) through mod(INFO,N+1). */
321: /* Further Details */
322: /* =============== */
324: /* Based on code written by */
325: /* Wilfried Gansterer and Bob Ward, */
326: /* Department of Computer Science, University of Tennessee */
328: /* This routine is comparable to Dlaed0.f from LAPACK. */
330: /* ===================================================================== */
332: #if defined(SLEPC_MISSING_LAPACK_LACPY) || defined(PETSC_MISSING_LAPACK_SYEV)
334: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LACPY/SYEV - Lapack routine is unavailable");
335: #else
336: PetscBLASInt i, j, k, np, rp1, ksk, one=1;
337: PetscBLASInt cut, mat1, kchk, kbrk, blks, kmax, icut, size, ksum, lsum;
338: PetscBLASInt lblks, rblks, isize, lwmin, ilsum;
339: PetscBLASInt start, vstrt, istck1, istck2, istck3, merged;
340: PetscBLASInt liwmin, matsiz, startp, istrtp;
341: PetscReal rho, done=1.0, dmone=-1.0;
345: *info = 0;
347: if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') {
348: *info = -1;
349: } else if (n < 2) {
350: *info = -2;
351: } else if (nblks < 2 || nblks > n) {
352: *info = -3;
353: }
354: if (*info == 0) {
355: ksum = 0;
356: kmax = 0;
357: kchk = 0;
358: for (k = 0; k < nblks; ++k) {
359: ksk = ksizes[k];
360: ksum += ksk;
361: if (ksk > kmax) kmax = ksk;
362: if (ksk < 1) kchk = 1;
363: }
364: lwmin = n*n + n * 3;
365: liwmin = PetscMax(n * 5,kmax * 5 + 3) + 4*nblks - 4;
366: if (ksum != n || kchk == 1) {
367: *info = -4;
368: } else if (l1d < PetscMax(3,kmax)) {
369: *info = -6;
370: } else if (l2d < PetscMax(3,kmax)) {
371: *info = -7;
372: } else if (l1e < PetscMax(3,2*kmax + 1)) {
373: *info = -10;
374: } else if (l2e < PetscMax(3,2*kmax + 1)) {
375: *info = -11;
376: } else if (tol > .1) {
377: *info = -12;
378: } else if (ldz < PetscMax(1,n)) {
379: *info = -15;
380: } else if (lwork < lwmin) {
381: *info = -17;
382: } else if (liwork < liwmin) {
383: *info = -19;
384: }
385: }
386: if (*info == 0) {
387: for (k = 0; k < nblks-1; ++k) {
388: if (rank[k] > PetscMin(ksizes[k],ksizes[k+1]) || rank[k] < 1) *info = -9;
389: }
390: }
392: if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DIBTDC",-(*info));
394: /* **************************************************************************** */
396: /* ...Preprocessing..................................................... */
397: /* Determine the optimal order for merging the subblocks and how much */
398: /* workspace will be needed for the merging (determined by the last */
399: /* merge). Cutpoints for the merging operations are determined and stored */
400: /* in reverse chronological order (starting with the final merging */
401: /* operation). */
402: 403: /* integer workspace requirements for the preprocessing: */
404: /* 4*(NBLKS-1) for merging history */
405: /* at most 3*(NBLKS-1) for stack */
407: start = 1;
408: size = n;
409: blks = nblks;
410: merged = 0;
411: k = 0;
413: /* integer workspace used for the stack is not needed any more after the */
414: /* preprocessing and therefore can use part of the 5*N */
415: /* integer workspace needed later on in the code */
417: istck1 = 0;
418: istck2 = istck1 + nblks;
419: istck3 = istck2 + nblks;
421: /* integer workspace used for storing the order of merges starts AFTER */
422: /* the integer workspace 5*N+3 which is needed later on in the code */
423: /* (5*KMAX+3 for DSYEVD, 4*N in DMERG2) */
425: istrtp = n * 5 + 4;
426: icut = istrtp + nblks - 1;
427: isize = icut + nblks - 1;
428: ilsum = isize + nblks - 1;
430: L200:432: if (nblks >= 3) {
434: /* Determine the cut point. Note that in the routine CUTLR it is */
435: /* chosen such that it yields the best balanced merging operation */
436: /* among all the rank modifications with minimum rank. */
438: cutlr_(start, size, blks, &ksizes[start-1], &rank[start-1], &cut,
439: &lsum, &lblks, info);
440: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: Error in cutlr, info = %d",*info);
442: } else {
443: cut = 1;
444: lsum = ksizes[0];
445: lblks = 1;
446: }
448: ++merged;
449: startp = 0;
450: for (i = 0; i < start-1; ++i) startp += ksizes[i];
451: iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
452: iwork[icut + (nblks - 1) - merged-1] = cut;
453: iwork[isize + (nblks - 1) - merged-1] = size;
454: iwork[ilsum + (nblks - 1) - merged-1] = lsum;
456: if (lblks == 2) {
458: /* one merge in left branch, left branch done */
459: ++merged;
460: iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
461: iwork[icut + (nblks - 1) - merged-1] = start;
462: iwork[isize + (nblks - 1) - merged-1] = lsum;
463: iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
464: }
466: if (lblks == 1 || lblks == 2) {
468: /* left branch done, continue on the right side */
469: start += lblks;
470: size -= lsum;
471: blks -= lblks;
473: if (blks <= 0) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: Error in preprocessing, blks = %d",blks);
475: if (blks == 2) {
477: /* one merge in right branch, right branch done */
478: ++merged;
479: startp += lsum;
480: iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
481: iwork[icut + (nblks - 1) - merged-1] = start;
482: iwork[isize + (nblks - 1) - merged-1] = size;
483: iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
484: }
486: if (blks == 1 || blks == 2) {
488: /* get the next subproblem from the stack or finished */
490: if (k >= 1) {
492: /* something left on the stack */
493: start = iwork[istck1 + k-1];
494: size = iwork[istck2 + k-1];
495: blks = iwork[istck3 + k-1];
496: --k;
497: goto L200;
498: } else {
500: /* nothing left on the stack */
501: if (merged != nblks-1) SETERRQ(PETSC_COMM_SELF,1,"ERROR in preprocessing - not enough merges performed");
503: /* exit preprocessing */
505: }
506: } else {
508: /* BLKS.GE.3, and therefore analyze the right side */
510: goto L200;
511: }
512: } else {
514: /* LBLKS.GE.3, and therefore check the right side and */
515: /* put it on the stack if required */
517: rblks = blks - lblks;
518: if (rblks >= 3) {
519: ++k;
520: iwork[istck1 + k-1] = cut + 1;
521: iwork[istck2 + k-1] = size - lsum;
522: iwork[istck3 + k-1] = rblks;
523: } else if (rblks == 2) {
525: /* one merge in right branch, right branch done */
526: /* (note that nothing needs to be done if RBLKS.EQ.1 !) */
528: ++merged;
529: startp += lsum;
530: iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
531: iwork[icut + (nblks - 1) - merged-1] = start + lblks;
532: iwork[isize + (nblks - 1) - merged-1] = size - lsum;
533: iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start + lblks-1];
534: }
535: if (rblks <= 0) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: ERROR in preprocessing - rblks = %d",rblks);
537: /* continue on the left side */
539: size = lsum;
540: blks = lblks;
541: goto L200;
542: }
544: /* SIZE = IWORK( ISIZE+NBLKS-2 ) */
545: /* MAT1 = IWORK( ILSUM+NBLKS-2 ) */
546: 547: /* Note: after the dimensions SIZE and MAT1 of the last merging */
548: /* operation have been determined, an upper bound for the workspace */
549: /* requirements which is independent of how much deflation occurs in */
550: /* the last merging operation could be determined as follows */
551: /* (based on (3.15) and (3.19) from UT-CS-00-447): */
552: 553: /* IF( MAT1.LE.N/2 ) THEN */
554: /* WSPREQ = 3*N + 3/2*( SIZE-MAT1 )**2 + N*N/2 + MAT1*MAT1 */
555: /* ELSE */
556: /* WSPREQ = 3*N + 3/2*MAT1*MAT1 + N*N/2 + ( SIZE-MAT1 )**2 */
557: /* END IF */
558: 559: /* IF( LWORK-WSPREQ.LT.0 )THEN */
560: /* not enough work space provided */
561: /* INFO = -200 - ( WSPREQ-LWORK ) */
562: /* RETURN */
563: /* END IF */
564: /* However, this is not really useful, since the actual check whether */
565: /* enough workspace is provided happens in DMERG2.f ! */
567: /* ************************************************************************* */
569: /* ...Solve subproblems................................... */
570: 571: /* Divide the matrix into NBLKS submatrices using rank-r */
572: /* modifications (cuts) and solve for their eigenvalues and */
573: /* eigenvectors. Initialize index array to sort eigenvalues. */
574: 575: /* first block: ...................................... */
576: 577: /* correction for block 1: D1 - V1 \Sigma1 V1^T */
579: ksk = ksizes[0];
580: rp1 = rank[0];
582: /* initialize the proper part of Z with the diagonal block D1 */
583: /* (the correction will be made in Z and then the call of DSYEVD will */
584: /* overwrite it with the eigenvectors) */
586: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, d, &l1d, z, &ldz));
588: /* copy D1 into WORK (in order to be able to restore it afterwards) */
590: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, d, &l1d, work, &ksk));
592: /* copy V1 into the first RANK(1) columns of D1 and then */
593: /* multiply with \Sigma1 */
595: for (i = 0; i < rank[0]; ++i) {
596: PetscStackCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1 + i+1)*l1e], &one, &d[i*l1d], &one));
597: PetscStackCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + rp1*l1e], &d[i*l1d], &one));
598: }
600: /* multiply the first RANK( 1 ) columns of D1 with V1^T and */
601: /* subtract the result from the proper part of Z (previously */
602: /* initialized with D1) */
604: PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, rank, &dmone,
605: d, &l1d, &e[(rank[0]+1)*l1e], &l1e, &done, z, &ldz));
607: /* restore the original D1 from WORK */
609: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, work, &ksk, d, &l1d));
611: /* eigenanalysis of block 1 (using DSYEVD) */
613: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, z, &ldz, ev, work, &lwork, info));
614: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: Error in DSYEVD for block 1, info = %d",*info);
616: /* EV( 1: ) contains the eigenvalues in ascending order */
617: /* (they are returned this way by DSYEVD) */
619: for (i = 0; i < ksk; ++i) iwork[i] = i+1;
621: /* intermediate blocks: .............................. */
623: np = ksk;
625: /* remaining number of blocks */
627: if (nblks > 2) {
628: for (k = 1; k < nblks-1; ++k) {
630: /* correction for block K: */
631: /* Dk - U(k-1) \Sigma(k-1) U(k-1)^T - Vk \Sigmak Vk^T */
633: ksk = ksizes[k];
634: rp1 = rank[k];
636: /* initialize the proper part of Z with the diagonal block Dk */
637: /* (the correction will be made in Z and then the call of DSYEVD will */
638: /* overwrite it with the eigenvectors) */
640: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, &d[k*l1d*l2d], &l1d, &z[np+np*ldz], &ldz));
642: /* copy Dk into WORK (in order to be able to restore it afterwards) */
644: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, &d[k*l1d*l2d], &l1d, work, &ksk));
646: /* copy U(K-1) into the first RANK(K-1) columns of Dk and then */
647: /* multiply with \Sigma(K-1) */
649: for (i = 0; i < rank[k-1]; ++i) {
650: PetscStackCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i+(k-1)*l2e)*l1e], &one, &d[(i+k*l2d)*l1d], &one));
651: PetscStackCallBLAS("BLASscal",BLASscal_(&ksk, &e[i+(rank[k-1]+(k-1)*l2e)*l1e], &d[(i+k*l2d)*l1d], &one));
652: }
654: /* multiply the first RANK(K-1) columns of Dk with U(k-1)^T and */
655: /* subtract the result from the proper part of Z (previously */
656: /* initialized with Dk) */
658: PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k-1],
659: &dmone, &d[k*l1d*l2d],
660: &l1d, &e[(k-1)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));
662: /* copy Vk into the first RANK(K) columns of Dk and then */
663: /* multiply with \Sigmak */
665: for (i = 0; i < rank[k]; ++i) {
666: PetscStackCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1+i+1 + k*l2e)*l1e], &one, &d[(i + k*l2d)*l1d], &one));
667: PetscStackCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + (rp1 + k*l2e)*l1e], &d[(i + k*l2d)*l1d], &one));
668: }
670: /* multiply the first RANK(K) columns of Dk with Vk^T and */
671: /* subtract the result from the proper part of Z (previously */
672: /* updated with [- U(k-1) \Sigma(k-1) U(k-1)^T] ) */
674: PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k],
675: &dmone, &d[k*l1d*l2d], &l1d,
676: &e[(rank[k]+1 + k*l2e)*l1e], &l1e, &done, &z[np+np*ldz], &ldz));
678: /* restore the original Dk from WORK */
680: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, work, &ksk, &d[k*l1d*l2d], &l1d));
682: /* eigenanalysis of block K (using dsyevd) */
684: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz],
685: &ldz, &ev[np], work, &lwork, info));
686: if (*info) SETERRQ2(PETSC_COMM_SELF,1,"dibtdc: Error in DSYEVD for block %d, info = %d",k,*info);
688: /* EV( NPP1: ) contains the eigenvalues in ascending order */
689: /* (they are returned this way by DSYEVD) */
691: for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;
693: /* update NP */
694: np += ksk;
695: }
696: }
698: /* last block: ....................................... */
699: 700: /* correction for block NBLKS: */
701: /* D(nblks) - U(nblks-1) \Sigma(nblks-1) U(nblks-1)^T */
703: ksk = ksizes[nblks-1];
705: /* initialize the proper part of Z with the diagonal block D(nblks) */
706: /* (the correction will be made in Z and then the call of DSYEVD will */
707: /* overwrite it with the eigenvectors) */
709: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, &d[(nblks-1)*l1d*l2d], &l1d, &z[np+np*ldz], &ldz));
711: /* copy D(nblks) into WORK (in order to be able to restore it afterwards) */
713: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, &d[(nblks-1)*l1d*l2d], &l1d, work, &ksk));
715: /* copy U(nblks-1) into the first RANK(nblks-1) columns of D(nblks) and then */
716: /* multiply with \Sigma(nblks-1) */
718: for (i = 0; i < rank[nblks-2]; ++i) {
719: PetscStackCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i + (nblks-2)*l2e)*l1e],
720: &one, &d[(i + (nblks-1)*l2d)*l1d], &one));
721: PetscStackCallBLAS("BLASscal",BLASscal_(&ksk,
722: &e[i + (rank[nblks-2] + (nblks-2)*l2e)*l1e],
723: &d[(i + (nblks-1)*l2d)*l1d], &one));
724: }
726: /* multiply the first RANK(nblks-1) columns of D(nblks) with U(nblks-1)^T */
727: /* and subtract the result from the proper part of Z (previously */
728: /* initialized with D(nblks) ) */
730: PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[nblks - 2],
731: &dmone, &d[(nblks-1)*l1d*l2d], &l1d,
732: &e[(nblks-2)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));
734: /* restore the original D(nblks) from WORK */
736: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, work, &ksk, &d[(nblks-1)*l1d*l2d], &l1d));
738: /* eigenanalysis of block NBLKS (using dsyevd) */
740: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], &ldz, &ev[np], work, &lwork, info));
741: if (*info) SETERRQ2(PETSC_COMM_SELF,1,"dibtdc: Error in DSYEVD for block %d, info = %d",nblks,*info);
743: /* EV( NPP1: ) contains the eigenvalues in ascending order */
744: /* (they are returned this way by DSYEVD) */
746: for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;
748: /* note that from here on the entire workspace is available again */
751: /* Perform all the merging operations. */
753: vstrt = 0;
754: for (i = 0; i < nblks-1; ++i) {
756: /* MATSIZ = total size of the current rank RANK modification problem */
758: matsiz = iwork[isize + i - 1];
759: np = iwork[istrtp + i - 1];
760: kbrk = iwork[icut + i - 1];
761: mat1 = iwork[ilsum + i - 1];
762: vstrt += np;
764: for (j = 0; j < rank[kbrk-1]; ++j) {
766: /* NOTE: The parameter RHO in DMERG2 is modified in DSRTDF */
767: /* (multiplied by 2) ! In order not to change the */
768: /* singular value stored in E( :, RANK( KBRK )+1, KBRK ), */
769: /* we do not pass on this variable as an argument to DMERG2, */
770: /* but we assign a separate variable RHO here which is passed */
771: /* on to DMERG2. */
772: /* Alternative solution in F90: */
773: /* pass E( :,RANK( KBRK )+1,KBRK ) to an INTENT( IN ) parameter */
774: /* in DMERG2. */
776: rho = e[j + (rank[kbrk-1] + (kbrk-1)*l2e)*l1e];
778: /* eigenvectors are accumulated ( JOBZ.EQ.'D' ) */
780: BDC_dmerg2_(jobz, j+1, matsiz, &ev[np-1], &z[np-1+(np-1)*ldz],
781: ldz, &iwork[np-1], &rho, &e[(j + (kbrk-1)*l2e)*l1e],
782: ksizes[kbrk], &e[(rank[kbrk-1]+j+1 + (kbrk-1)*l2e)*l1e],
783: ksizes[kbrk-1], mat1, work, lwork, &iwork[n], tol, info, 1);
784: 785: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: Error in dmerg2, info = %d",*info);
786: }
788: /* at this point all RANK( KBRK ) rank-one modifications corresponding */
789: /* to the current off-diagonal block are finished. */
790: /* Move on to the next off-diagonal block. */
792: }
794: /* Re-merge the eigenvalues/vectors which were deflated at the final */
795: /* merging step by sorting all eigenvalues and eigenvectors according */
796: /* to the permutation stored in IWORK. */
797: 798: /* copy eigenvalues and eigenvectors in ordered form into WORK */
799: /* (eigenvalues into WORK( 1:N ), eigenvectors into WORK( N+1:N+1+N^2 ) ) */
801: for (i = 0; i < n; ++i) {
802: j = iwork[i];
803: work[i] = ev[j-1];
804: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, &z[(j-1)*ldz], &one, &work[n*(i+1)], &one));
805: }
807: /* copy ordered eigenvalues back from WORK( 1:N ) into EV */
809: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, work, &one, ev, &one));
811: /* copy ordered eigenvectors back from WORK( N+1:N+1+N^2 ) into Z */
813: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("A", &n, &n, &work[n], &n, z, &ldz));
814: return(0);
815: #endif
816: }