Actual source code: dshep.c
slepc-3.7.0 2016-05-16
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <slepc/private/dsimpl.h>
23: #include <slepcblaslapack.h>
27: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
28: {
32: DSAllocateMat_Private(ds,DS_MAT_A);
33: DSAllocateMat_Private(ds,DS_MAT_Q);
34: DSAllocateMatReal_Private(ds,DS_MAT_T);
35: PetscFree(ds->perm);
36: PetscMalloc1(ld,&ds->perm);
37: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
38: return(0);
39: }
41: /* 0 l k n-1
42: -----------------------------------------
43: |* . . |
44: | * . . |
45: | * . . |
46: | * . . |
47: |. . . . o o |
48: | o o |
49: | o o |
50: | o o |
51: | o o |
52: | o o |
53: |. . . . o o o o o o o x |
54: | x x x |
55: | x x x |
56: | x x x |
57: | x x x |
58: | x x x |
59: | x x x |
60: | x x x |
61: | x x x|
62: | x x|
63: -----------------------------------------
64: */
68: static PetscErrorCode DSSwitchFormat_HEP(DS ds,PetscBool tocompact)
69: {
71: PetscReal *T = ds->rmat[DS_MAT_T];
72: PetscScalar *A = ds->mat[DS_MAT_A];
73: PetscInt i,n=ds->n,k=ds->k,ld=ds->ld;
76: if (tocompact) { /* switch from dense (arrow) to compact storage */
77: PetscMemzero(T,3*ld*sizeof(PetscReal));
78: for (i=0;i<k;i++) {
79: T[i] = PetscRealPart(A[i+i*ld]);
80: T[i+ld] = PetscRealPart(A[k+i*ld]);
81: }
82: for (i=k;i<n-1;i++) {
83: T[i] = PetscRealPart(A[i+i*ld]);
84: T[i+ld] = PetscRealPart(A[i+1+i*ld]);
85: }
86: T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
87: if (ds->extrarow) T[n-1+ld] = PetscRealPart(A[n+(n-1)*ld]);
88: } else { /* switch from compact (arrow) to dense storage */
89: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
90: for (i=0;i<k;i++) {
91: A[i+i*ld] = T[i];
92: A[k+i*ld] = T[i+ld];
93: A[i+k*ld] = T[i+ld];
94: }
95: A[k+k*ld] = T[k];
96: for (i=k+1;i<n;i++) {
97: A[i+i*ld] = T[i];
98: A[i-1+i*ld] = T[i-1+ld];
99: A[i+(i-1)*ld] = T[i-1+ld];
100: }
101: if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
102: }
103: return(0);
104: }
108: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
109: {
110: PetscErrorCode ierr;
111: PetscViewerFormat format;
112: PetscInt i,j,r,c,rows;
113: PetscReal value;
114: const char *methodname[] = {
115: "Implicit QR method (_steqr)",
116: "Relatively Robust Representations (_stevr)",
117: "Divide and Conquer method (_stedc)",
118: "Block Divide and Conquer method (dsbtdc)"
119: };
120: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
123: PetscViewerGetFormat(viewer,&format);
124: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
125: if (ds->bs>1) {
126: PetscViewerASCIIPrintf(viewer,"block size: %D\n",ds->bs);
127: }
128: if (ds->method>=nmeth) {
129: PetscViewerASCIIPrintf(viewer,"solving the problem with: INVALID METHOD\n");
130: } else {
131: PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
132: }
133: return(0);
134: }
135: if (ds->compact) {
136: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
137: rows = ds->extrarow? ds->n+1: ds->n;
138: if (format == PETSC_VIEWER_ASCII_MATLAB) {
139: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",rows,ds->n);
140: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
141: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
142: for (i=0;i<ds->n;i++) {
143: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_T]+i));
144: }
145: for (i=0;i<rows-1;i++) {
146: r = PetscMax(i+2,ds->k+1);
147: c = i+1;
148: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",r,c,*(ds->rmat[DS_MAT_T]+ds->ld+i));
149: if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
150: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",c,r,*(ds->rmat[DS_MAT_T]+ds->ld+i));
151: }
152: }
153: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
154: } else {
155: for (i=0;i<rows;i++) {
156: for (j=0;j<ds->n;j++) {
157: if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
158: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
159: else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
160: else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
161: else value = 0.0;
162: PetscViewerASCIIPrintf(viewer," %18.16e ",value);
163: }
164: PetscViewerASCIIPrintf(viewer,"\n");
165: }
166: }
167: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
168: PetscViewerFlush(viewer);
169: } else {
170: DSViewMat(ds,viewer,DS_MAT_A);
171: }
172: if (ds->state>DS_STATE_INTERMEDIATE) {
173: DSViewMat(ds,viewer,DS_MAT_Q);
174: }
175: return(0);
176: }
180: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
181: {
182: PetscScalar *Q = ds->mat[DS_MAT_Q];
183: PetscInt ld = ds->ld,i;
187: switch (mat) {
188: case DS_MAT_X:
189: case DS_MAT_Y:
190: if (j) {
191: if (ds->state>=DS_STATE_CONDENSED) {
192: PetscMemcpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld*sizeof(PetscScalar));
193: } else {
194: PetscMemzero(ds->mat[mat]+(*j)*ld,ld*sizeof(PetscScalar));
195: *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
196: }
197: } else {
198: if (ds->state>=DS_STATE_CONDENSED) {
199: PetscMemcpy(ds->mat[mat],Q,ld*ld*sizeof(PetscScalar));
200: } else {
201: PetscMemzero(ds->mat[mat],ld*ld*sizeof(PetscScalar));
202: for (i=0;i<ds->n;i++) *(ds->mat[mat]+i+i*ld) = 1.0;
203: }
204: }
205: if (rnorm && j) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
206: break;
207: case DS_MAT_U:
208: case DS_MAT_VT:
209: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
210: break;
211: default:
212: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
213: }
214: return(0);
215: }
219: PetscErrorCode DSNormalize_HEP(DS ds,DSMatType mat,PetscInt col)
220: {
222: switch (mat) {
223: case DS_MAT_X:
224: case DS_MAT_Y:
225: case DS_MAT_Q:
226: /* All the matrices resulting from DSVectors and DSSolve are already normalized */
227: break;
228: case DS_MAT_U:
229: case DS_MAT_VT:
230: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
231: break;
232: default:
233: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
234: }
235: return(0);
236: }
240: /*
241: ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form
243: [ d 0 0 0 e ]
244: [ 0 d 0 0 e ]
245: A = [ 0 0 d 0 e ]
246: [ 0 0 0 d e ]
247: [ e e e e d ]
249: to tridiagonal form
251: [ d e 0 0 0 ]
252: [ e d e 0 0 ]
253: T = Q'*A*Q = [ 0 e d e 0 ],
254: [ 0 0 e d e ]
255: [ 0 0 0 e d ]
257: where Q is an orthogonal matrix. Rutishauser's algorithm is used to
258: perform the reduction, which requires O(n**2) flops. The accumulation
259: of the orthogonal factor Q, however, requires O(n**3) flops.
261: Arguments
262: =========
264: N (input) INTEGER
265: The order of the matrix A. N >= 0.
267: D (input/output) DOUBLE PRECISION array, dimension (N)
268: On entry, the diagonal entries of the matrix A to be
269: reduced.
270: On exit, the diagonal entries of the reduced matrix T.
272: E (input/output) DOUBLE PRECISION array, dimension (N-1)
273: On entry, the off-diagonal entries of the matrix A to be
274: reduced.
275: On exit, the subdiagonal entries of the reduced matrix T.
277: Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
278: On exit, the orthogonal matrix Q.
280: LDQ (input) INTEGER
281: The leading dimension of the array Q.
283: Note
284: ====
285: Based on Fortran code contributed by Daniel Kressner
286: */
287: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
288: {
289: #if defined(SLEPC_MISSING_LAPACK_LARTG)
291: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARTG - Lapack routine is unavailable");
292: #else
293: PetscBLASInt i,j,j2,one=1;
294: PetscReal c,s,p,off,temp;
297: if (n<=2) return(0);
299: for (j=0;j<n-2;j++) {
301: /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
302: temp = e[j+1];
303: PetscStackCallBLAS("LAPACKlartg",LAPACKlartg_(&temp,&e[j],&c,&s,&e[j+1]));
304: s = -s;
306: /* Apply rotation to diagonal elements */
307: temp = d[j+1];
308: e[j] = c*s*(temp-d[j]);
309: d[j+1] = s*s*d[j] + c*c*temp;
310: d[j] = c*c*d[j] + s*s*temp;
312: /* Apply rotation to Q */
313: j2 = j+2;
314: PetscStackCallBLAS("BLASrot",BLASrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));
316: /* Chase newly introduced off-diagonal entry to the top left corner */
317: for (i=j-1;i>=0;i--) {
318: off = -s*e[i];
319: e[i] = c*e[i];
320: temp = e[i+1];
321: PetscStackCallBLAS("LAPACKlartg",LAPACKlartg_(&temp,&off,&c,&s,&e[i+1]));
322: s = -s;
323: temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
324: p = s*temp;
325: d[i+1] += p;
326: d[i] -= p;
327: e[i] = -e[i] - c*temp;
328: j2 = j+2;
329: PetscStackCallBLAS("BLASrot",BLASrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
330: }
331: }
332: return(0);
333: #endif
334: }
338: /*
339: Reduce to tridiagonal form by means of ArrowTridiag.
340: */
341: static PetscErrorCode DSIntermediate_HEP(DS ds)
342: {
343: #if defined(SLEPC_MISSING_LAPACK_SYTRD) || defined(SLEPC_MISSING_LAPACK_ORGTR)
345: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYTRD/ORGTR - Lapack routine is unavailable");
346: #else
348: PetscInt i;
349: PetscBLASInt n1,n2,n3,lwork,info,l,n,ld,off;
350: PetscScalar *A,*Q,*work,*tau;
351: PetscReal *d,*e;
354: PetscBLASIntCast(ds->n,&n);
355: PetscBLASIntCast(ds->l,&l);
356: PetscBLASIntCast(ds->ld,&ld);
357: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
358: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
359: n3 = n1+n2;
360: off = l+l*ld;
361: A = ds->mat[DS_MAT_A];
362: Q = ds->mat[DS_MAT_Q];
363: d = ds->rmat[DS_MAT_T];
364: e = ds->rmat[DS_MAT_T]+ld;
365: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
366: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
368: if (ds->compact) {
370: if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);
372: } else {
374: for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }
376: if (ds->state<DS_STATE_INTERMEDIATE) {
377: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
378: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
379: tau = ds->work;
380: work = ds->work+ld;
381: lwork = ld*ld;
382: PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n3,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
383: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSYTRD %d",info);
384: PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n3,Q+off,&ld,tau,work,&lwork,&info));
385: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGTR %d",info);
386: } else {
387: /* copy tridiagonal to d,e */
388: for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
389: for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
390: }
391: }
392: return(0);
393: #endif
394: }
398: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
399: {
401: PetscInt n,l,i,*perm,ld=ds->ld;
402: PetscScalar *A;
403: PetscReal *d;
406: if (!ds->sc) return(0);
407: n = ds->n;
408: l = ds->l;
409: A = ds->mat[DS_MAT_A];
410: d = ds->rmat[DS_MAT_T];
411: perm = ds->perm;
412: if (!rr) {
413: DSSortEigenvaluesReal_Private(ds,d,perm);
414: } else {
415: DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
416: }
417: for (i=l;i<n;i++) wr[i] = d[perm[i]];
418: DSPermuteColumns_Private(ds,l,n,DS_MAT_Q,perm);
419: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
420: if (!ds->compact) {
421: for (i=l;i<n;i++) A[i+i*ld] = wr[i];
422: }
423: return(0);
424: }
428: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
429: {
431: PetscInt i;
432: PetscBLASInt n,ld,incx=1;
433: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
434: PetscReal *e,beta;
437: PetscBLASIntCast(ds->n,&n);
438: PetscBLASIntCast(ds->ld,&ld);
439: A = ds->mat[DS_MAT_A];
440: Q = ds->mat[DS_MAT_Q];
441: e = ds->rmat[DS_MAT_T]+ld;
443: if (ds->compact) {
444: beta = e[n-1]; /* in compact, we assume all entries are zero except the last one */
445: for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
446: ds->k = n;
447: } else {
448: DSAllocateWork_Private(ds,2*ld,0,0);
449: x = ds->work;
450: y = ds->work+ld;
451: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
452: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
453: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
454: ds->k = n;
455: }
456: return(0);
457: }
461: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
462: {
463: #if defined(PETSC_MISSING_LAPACK_STEQR)
465: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable");
466: #else
468: PetscInt i;
469: PetscBLASInt n1,n2,n3,info,l,n,ld,off;
470: PetscScalar *Q,*A;
471: PetscReal *d,*e;
474: if (ds->bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This method is not prepared for bs>1");
475: PetscBLASIntCast(ds->n,&n);
476: PetscBLASIntCast(ds->l,&l);
477: PetscBLASIntCast(ds->ld,&ld);
478: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
479: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
480: n3 = n1+n2;
481: off = l+l*ld;
482: Q = ds->mat[DS_MAT_Q];
483: A = ds->mat[DS_MAT_A];
484: d = ds->rmat[DS_MAT_T];
485: e = ds->rmat[DS_MAT_T]+ld;
487: /* Reduce to tridiagonal form */
488: DSIntermediate_HEP(ds);
490: /* Solve the tridiagonal eigenproblem */
491: for (i=0;i<l;i++) wr[i] = d[i];
493: DSAllocateWork_Private(ds,0,2*ld,0);
494: PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n3,d+l,e+l,Q+off,&ld,ds->rwork,&info));
495: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSTEQR %d",info);
496: for (i=l;i<n;i++) wr[i] = d[i];
498: /* Create diagonal matrix as a result */
499: if (ds->compact) {
500: PetscMemzero(e,(n-1)*sizeof(PetscReal));
501: } else {
502: for (i=l;i<n;i++) {
503: PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
504: }
505: for (i=l;i<n;i++) A[i+i*ld] = d[i];
506: }
508: /* Set zero wi */
509: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
510: return(0);
511: #endif
512: }
516: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
517: {
518: #if defined(SLEPC_MISSING_LAPACK_STEVR)
520: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
521: #else
523: PetscInt i;
524: PetscBLASInt n1,n2,n3,lwork,liwork,info,l,n,m,ld,off,il,iu,*isuppz;
525: PetscScalar *A,*Q,*W=NULL,one=1.0,zero=0.0;
526: PetscReal *d,*e,abstol=0.0,vl,vu;
527: #if defined(PETSC_USE_COMPLEX)
528: PetscInt j;
529: PetscReal *ritz;
530: #endif
533: if (ds->bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This method is not prepared for bs>1");
534: PetscBLASIntCast(ds->n,&n);
535: PetscBLASIntCast(ds->l,&l);
536: PetscBLASIntCast(ds->ld,&ld);
537: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
538: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
539: n3 = n1+n2;
540: off = l+l*ld;
541: A = ds->mat[DS_MAT_A];
542: Q = ds->mat[DS_MAT_Q];
543: d = ds->rmat[DS_MAT_T];
544: e = ds->rmat[DS_MAT_T]+ld;
546: /* Reduce to tridiagonal form */
547: DSIntermediate_HEP(ds);
549: /* Solve the tridiagonal eigenproblem */
550: for (i=0;i<l;i++) wr[i] = d[i];
552: if (ds->state<DS_STATE_INTERMEDIATE) { /* Q contains useful info */
553: DSAllocateMat_Private(ds,DS_MAT_W);
554: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
555: W = ds->mat[DS_MAT_W];
556: }
557: #if defined(PETSC_USE_COMPLEX)
558: DSAllocateMatReal_Private(ds,DS_MAT_Q);
559: #endif
560: lwork = 20*ld;
561: liwork = 10*ld;
562: DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
563: isuppz = ds->iwork+liwork;
564: #if defined(PETSC_USE_COMPLEX)
565: ritz = ds->rwork+lwork;
566: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
567: for (i=l;i<n;i++) wr[i] = ritz[i];
568: #else
569: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
570: #endif
571: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack DSTEVR %d",info);
572: #if defined(PETSC_USE_COMPLEX)
573: for (i=l;i<n;i++)
574: for (j=l;j<n;j++)
575: Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
576: #endif
577: if (ds->state<DS_STATE_INTERMEDIATE) { /* accumulate previous Q */
578: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
579: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
580: }
581: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
583: /* Create diagonal matrix as a result */
584: if (ds->compact) {
585: PetscMemzero(e,(n-1)*sizeof(PetscReal));
586: } else {
587: for (i=l;i<n;i++) {
588: PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
589: }
590: for (i=l;i<n;i++) A[i+i*ld] = d[i];
591: }
593: /* Set zero wi */
594: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
595: return(0);
596: #endif
597: }
601: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
602: {
603: #if defined(SLEPC_MISSING_LAPACK_STEDC)
605: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEDC - Lapack routine is unavailable");
606: #else
608: PetscInt i;
609: PetscBLASInt n1,info,l,ld,off,lrwork,liwork;
610: PetscScalar *Q,*A;
611: PetscReal *d,*e;
612: #if defined(PETSC_USE_COMPLEX)
613: PetscBLASInt lwork;
614: PetscInt j;
615: #endif
618: if (ds->bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This method is not prepared for bs>1");
619: PetscBLASIntCast(ds->l,&l);
620: PetscBLASIntCast(ds->ld,&ld);
621: PetscBLASIntCast(ds->n-ds->l,&n1);
622: off = l+l*ld;
623: Q = ds->mat[DS_MAT_Q];
624: A = ds->mat[DS_MAT_A];
625: d = ds->rmat[DS_MAT_T];
626: e = ds->rmat[DS_MAT_T]+ld;
628: /* Reduce to tridiagonal form */
629: DSIntermediate_HEP(ds);
631: /* Solve the tridiagonal eigenproblem */
632: for (i=0;i<l;i++) wr[i] = d[i];
634: lrwork = 5*n1*n1+3*n1+1;
635: liwork = 5*n1*n1+6*n1+6;
636: #if !defined(PETSC_USE_COMPLEX)
637: DSAllocateWork_Private(ds,0,lrwork,liwork);
638: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
639: #else
640: lwork = ld*ld;
641: DSAllocateWork_Private(ds,lwork,lrwork,liwork);
642: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
643: /* Fixing Lapack bug*/
644: for (j=ds->l;j<ds->n;j++)
645: for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
646: #endif
647: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSTEDC %d",info);
648: for (i=l;i<ds->n;i++) wr[i] = d[i];
650: /* Create diagonal matrix as a result */
651: if (ds->compact) {
652: PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
653: } else {
654: for (i=l;i<ds->n;i++) {
655: PetscMemzero(A+l+i*ld,(ds->n-l)*sizeof(PetscScalar));
656: }
657: for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
658: }
660: /* Set zero wi */
661: if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
662: return(0);
663: #endif
664: }
666: #if !defined(PETSC_USE_COMPLEX)
669: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
670: {
672: PetscBLASInt i,j,k,m,n,info,nblks,bs,ld,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
673: PetscScalar *Q,*A;
674: PetscReal *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;
677: if (ds->l>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This method is not prepared for l>1");
678: if (ds->compact) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for compact storage");
679: PetscBLASIntCast(ds->ld,&ld);
680: PetscBLASIntCast(ds->bs,&bs);
681: PetscBLASIntCast(ds->n,&n);
682: nblks = n/bs;
683: Q = ds->mat[DS_MAT_Q];
684: A = ds->mat[DS_MAT_A];
685: d = ds->rmat[DS_MAT_T];
686: e = ds->rmat[DS_MAT_T]+ld;
687: lrwork = 4*n*n+60*n+1;
688: liwork = 5*n+5*nblks-1;
689: lde = 2*bs+1;
690: DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
691: D = ds->work;
692: E = ds->work+bs*n;
693: rwork = ds->rwork;
694: ksizes = ds->iwork;
695: iwork = ds->iwork+nblks;
696: PetscMemzero(iwork,liwork*sizeof(PetscBLASInt));
698: /* Copy matrix to block tridiagonal format */
699: j=0;
700: for (i=0;i<nblks;i++) {
701: ksizes[i]=bs;
702: for (k=0;k<bs;k++)
703: for (m=0;m<bs;m++)
704: D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
705: j = j + bs;
706: }
707: j=0;
708: for (i=0;i<nblks-1;i++) {
709: for (k=0;k<bs;k++)
710: for (m=0;m<bs;m++)
711: E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
712: j = j + bs;
713: }
715: /* Solve the block tridiagonal eigenproblem */
716: BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,
717: Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
718: for (i=0;i<ds->n;i++) wr[i] = d[i];
720: /* Create diagonal matrix as a result */
721: if (ds->compact) {
722: PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
723: } else {
724: for (i=0;i<ds->n;i++) {
725: PetscMemzero(A+i*ld,ds->n*sizeof(PetscScalar));
726: }
727: for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
728: }
730: /* Set zero wi */
731: if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
732: return(0);
733: }
734: #endif
738: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n)
739: {
740: PetscInt i,ld=ds->ld,l=ds->l;
741: PetscScalar *A;
744: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
745: A = ds->mat[DS_MAT_A];
746: if (!ds->compact && ds->extrarow && ds->k==ds->n) {
747: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
748: }
749: if (ds->extrarow) ds->k = n;
750: else ds->k = 0;
751: ds->n = n;
752: return(0);
753: }
757: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
758: {
759: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE)
761: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE - Lapack routines are unavailable");
762: #else
764: PetscScalar *work;
765: PetscReal *rwork;
766: PetscBLASInt *ipiv;
767: PetscBLASInt lwork,info,n,ld;
768: PetscReal hn,hin;
769: PetscScalar *A;
772: PetscBLASIntCast(ds->n,&n);
773: PetscBLASIntCast(ds->ld,&ld);
774: lwork = 8*ld;
775: DSAllocateWork_Private(ds,lwork,ld,ld);
776: work = ds->work;
777: rwork = ds->rwork;
778: ipiv = ds->iwork;
779: DSSwitchFormat_HEP(ds,PETSC_FALSE);
781: /* use workspace matrix W to avoid overwriting A */
782: DSAllocateMat_Private(ds,DS_MAT_W);
783: A = ds->mat[DS_MAT_W];
784: PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);
786: /* norm of A */
787: hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
789: /* norm of inv(A) */
790: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
791: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
792: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
793: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
794: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
796: *cond = hn*hin;
797: return(0);
798: #endif
799: }
803: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
804: {
805: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
807: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
808: #else
810: PetscInt i,j,k=ds->k;
811: PetscScalar *Q,*A,*R,*tau,*work;
812: PetscBLASInt ld,n1,n0,lwork,info;
815: PetscBLASIntCast(ds->ld,&ld);
816: DSAllocateWork_Private(ds,ld*ld,0,0);
817: tau = ds->work;
818: work = ds->work+ld;
819: PetscBLASIntCast(ld*(ld-1),&lwork);
820: DSAllocateMat_Private(ds,DS_MAT_W);
821: A = ds->mat[DS_MAT_A];
822: Q = ds->mat[DS_MAT_Q];
823: R = ds->mat[DS_MAT_W];
824: /* Copy I+alpha*A */
825: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
826: PetscMemzero(R,ld*ld*sizeof(PetscScalar));
827: for (i=0;i<k;i++) {
828: Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
829: Q[k+i*ld] = alpha*A[k+i*ld];
830: }
831: /* Compute qr */
832: PetscBLASIntCast(k+1,&n1);
833: PetscBLASIntCast(k,&n0);
834: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
835: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
836: /* Copy R from Q */
837: for (j=0;j<k;j++)
838: for (i=0;i<=j;i++)
839: R[i+j*ld] = Q[i+j*ld];
840: /* Compute orthogonal matrix in Q */
841: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
842: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
843: /* Compute the updated matrix of projected problem */
844: for (j=0;j<k;j++)
845: for (i=0;i<k+1;i++)
846: A[j*ld+i] = Q[i*ld+j];
847: alpha = -1.0/alpha;
848: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
849: for (i=0;i<k;i++)
850: A[ld*i+i]-=alpha;
851: return(0);
852: #endif
853: }
857: PETSC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
858: {
860: ds->ops->allocate = DSAllocate_HEP;
861: ds->ops->view = DSView_HEP;
862: ds->ops->vectors = DSVectors_HEP;
863: ds->ops->solve[0] = DSSolve_HEP_QR;
864: ds->ops->solve[1] = DSSolve_HEP_MRRR;
865: ds->ops->solve[2] = DSSolve_HEP_DC;
866: #if !defined(PETSC_USE_COMPLEX)
867: ds->ops->solve[3] = DSSolve_HEP_BDC;
868: #endif
869: ds->ops->sort = DSSort_HEP;
870: ds->ops->truncate = DSTruncate_HEP;
871: ds->ops->update = DSUpdateExtraRow_HEP;
872: ds->ops->cond = DSCond_HEP;
873: ds->ops->transrks = DSTranslateRKS_HEP;
874: ds->ops->normalize = DSNormalize_HEP;
875: return(0);
876: }