Actual source code: dsnhep.c
slepc-3.13.0 2020-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
15: {
19: DSAllocateMat_Private(ds,DS_MAT_A);
20: DSAllocateMat_Private(ds,DS_MAT_Q);
21: PetscFree(ds->perm);
22: PetscMalloc1(ld,&ds->perm);
23: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
24: return(0);
25: }
27: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
28: {
29: PetscErrorCode ierr;
30: PetscViewerFormat format;
33: PetscViewerGetFormat(viewer,&format);
34: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);
35: DSViewMat(ds,viewer,DS_MAT_A);
36: if (ds->state>DS_STATE_INTERMEDIATE) { DSViewMat(ds,viewer,DS_MAT_Q); }
37: if (ds->mat[DS_MAT_X]) { DSViewMat(ds,viewer,DS_MAT_X); }
38: if (ds->mat[DS_MAT_Y]) { DSViewMat(ds,viewer,DS_MAT_Y); }
39: return(0);
40: }
42: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
43: {
45: PetscInt i,j;
46: PetscBLASInt info,ld,n,n1,lwork,inc=1;
47: PetscScalar sdummy,done=1.0,zero=0.0;
48: PetscReal *sigma;
49: PetscBool iscomplex = PETSC_FALSE;
50: PetscScalar *A = ds->mat[DS_MAT_A];
51: PetscScalar *Q = ds->mat[DS_MAT_Q];
52: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
53: PetscScalar *W;
56: if (left) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
57: PetscBLASIntCast(ds->n,&n);
58: PetscBLASIntCast(ds->ld,&ld);
59: n1 = n+1;
60: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
61: if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
62: DSAllocateWork_Private(ds,5*ld,6*ld,0);
63: DSAllocateMat_Private(ds,DS_MAT_W);
64: W = ds->mat[DS_MAT_W];
65: lwork = 5*ld;
66: sigma = ds->rwork+5*ld;
68: /* build A-w*I in W */
69: for (j=0;j<n;j++)
70: for (i=0;i<=n;i++)
71: W[i+j*ld] = A[i+j*ld];
72: for (i=0;i<n;i++)
73: W[i+i*ld] -= A[(*k)+(*k)*ld];
75: /* compute SVD of W */
76: #if !defined(PETSC_USE_COMPLEX)
77: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
78: #else
79: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
80: #endif
81: SlepcCheckLapackInfo("gesvd",info);
83: /* the smallest singular value is the new error estimate */
84: if (rnorm) *rnorm = sigma[n-1];
86: /* update vector with right singular vector associated to smallest singular value,
87: accumulating the transformation matrix Q */
88: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
89: return(0);
90: }
92: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
93: {
95: PetscInt i;
98: for (i=0;i<ds->n;i++) {
99: DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
100: }
101: return(0);
102: }
104: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
105: {
107: PetscInt i;
108: PetscBLASInt mm=1,mout,info,ld,n,*select,inc = 1;
109: PetscScalar tmp,done=1.0,zero=0.0;
110: PetscReal norm;
111: PetscBool iscomplex = PETSC_FALSE;
112: PetscScalar *A = ds->mat[DS_MAT_A];
113: PetscScalar *Q = ds->mat[DS_MAT_Q];
114: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
115: PetscScalar *Y;
118: PetscBLASIntCast(ds->n,&n);
119: PetscBLASIntCast(ds->ld,&ld);
120: DSAllocateWork_Private(ds,0,0,ld);
121: select = ds->iwork;
122: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
124: /* compute k-th eigenvector Y of A */
125: Y = X+(*k)*ld;
126: select[*k] = (PetscBLASInt)PETSC_TRUE;
127: #if !defined(PETSC_USE_COMPLEX)
128: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
129: mm = iscomplex? 2: 1;
130: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
131: DSAllocateWork_Private(ds,3*ld,0,0);
132: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
133: #else
134: DSAllocateWork_Private(ds,2*ld,ld,0);
135: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
136: #endif
137: SlepcCheckLapackInfo("trevc",info);
138: if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");
140: /* accumulate and normalize eigenvectors */
141: if (ds->state>=DS_STATE_CONDENSED) {
142: PetscArraycpy(ds->work,Y,mout*ld);
143: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
144: #if !defined(PETSC_USE_COMPLEX)
145: if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
146: #endif
147: norm = BLASnrm2_(&n,Y,&inc);
148: #if !defined(PETSC_USE_COMPLEX)
149: if (iscomplex) {
150: tmp = BLASnrm2_(&n,Y+ld,&inc);
151: norm = SlepcAbsEigenvalue(norm,tmp);
152: }
153: #endif
154: tmp = 1.0 / norm;
155: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
156: #if !defined(PETSC_USE_COMPLEX)
157: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
158: #endif
159: }
161: /* set output arguments */
162: if (iscomplex) (*k)++;
163: if (rnorm) {
164: if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
165: else *rnorm = PetscAbsScalar(Y[n-1]);
166: }
167: return(0);
168: }
170: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
171: {
173: PetscInt i;
174: PetscBLASInt n,ld,mout,info,inc = 1;
175: PetscBool iscomplex;
176: PetscScalar *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
177: PetscReal norm;
178: const char *side,*back;
181: PetscBLASIntCast(ds->n,&n);
182: PetscBLASIntCast(ds->ld,&ld);
183: if (left) {
184: X = NULL;
185: Y = ds->mat[DS_MAT_Y];
186: side = "L";
187: } else {
188: X = ds->mat[DS_MAT_X];
189: Y = NULL;
190: side = "R";
191: }
192: Z = left? Y: X;
193: if (ds->state>=DS_STATE_CONDENSED) {
194: /* DSSolve() has been called, backtransform with matrix Q */
195: back = "B";
196: PetscArraycpy(Z,ds->mat[DS_MAT_Q],ld*ld);
197: } else back = "A";
198: #if !defined(PETSC_USE_COMPLEX)
199: DSAllocateWork_Private(ds,3*ld,0,0);
200: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
201: #else
202: DSAllocateWork_Private(ds,2*ld,ld,0);
203: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
204: #endif
205: SlepcCheckLapackInfo("trevc",info);
207: /* normalize eigenvectors */
208: for (i=0;i<n;i++) {
209: iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
210: norm = BLASnrm2_(&n,Z+i*ld,&inc);
211: #if !defined(PETSC_USE_COMPLEX)
212: if (iscomplex) {
213: tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
214: norm = SlepcAbsEigenvalue(norm,tmp);
215: }
216: #endif
217: tmp = 1.0 / norm;
218: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
219: #if !defined(PETSC_USE_COMPLEX)
220: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
221: #endif
222: if (iscomplex) i++;
223: }
224: return(0);
225: }
227: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
228: {
232: switch (mat) {
233: case DS_MAT_X:
234: if (ds->refined) {
235: if (!ds->extrarow) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
236: if (j) {
237: DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
238: } else {
239: DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
240: }
241: } else {
242: if (j) {
243: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
244: } else {
245: DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
246: }
247: }
248: break;
249: case DS_MAT_Y:
250: if (ds->refined) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
251: if (j) {
252: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
253: } else {
254: DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
255: }
256: break;
257: case DS_MAT_U:
258: case DS_MAT_VT:
259: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
260: break;
261: default:
262: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
263: }
264: if (ds->state < DS_STATE_CONDENSED) {
265: DSSetState(ds,DS_STATE_CONDENSED);
266: }
267: return(0);
268: }
270: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
271: {
273: PetscInt i;
274: PetscBLASInt info,n,ld,mout,lwork,*selection;
275: PetscScalar *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
276: #if !defined(PETSC_USE_COMPLEX)
277: PetscBLASInt *iwork,liwork;
278: #endif
281: if (!k) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
282: PetscBLASIntCast(ds->n,&n);
283: PetscBLASIntCast(ds->ld,&ld);
284: #if !defined(PETSC_USE_COMPLEX)
285: lwork = n;
286: liwork = 1;
287: DSAllocateWork_Private(ds,lwork,0,liwork+n);
288: work = ds->work;
289: lwork = ds->lwork;
290: selection = ds->iwork;
291: iwork = ds->iwork + n;
292: liwork = ds->liwork - n;
293: #else
294: lwork = 1;
295: DSAllocateWork_Private(ds,lwork,0,n);
296: work = ds->work;
297: selection = ds->iwork;
298: #endif
299: /* Compute the selected eigenvalue to be in the leading position */
300: DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
301: PetscArrayzero(selection,n);
302: for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
303: #if !defined(PETSC_USE_COMPLEX)
304: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
305: #else
306: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
307: #endif
308: SlepcCheckLapackInfo("trsen",info);
309: *k = mout;
310: return(0);
311: }
313: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
314: {
316: PetscScalar re;
317: PetscInt i,j,pos,result;
318: PetscBLASInt ifst,ilst,info,n,ld;
319: PetscScalar *T = ds->mat[DS_MAT_A];
320: PetscScalar *Q = ds->mat[DS_MAT_Q];
321: #if !defined(PETSC_USE_COMPLEX)
322: PetscScalar *work,im;
323: #endif
326: PetscBLASIntCast(ds->n,&n);
327: PetscBLASIntCast(ds->ld,&ld);
328: #if !defined(PETSC_USE_COMPLEX)
329: DSAllocateWork_Private(ds,ld,0,0);
330: work = ds->work;
331: #endif
332: /* selection sort */
333: for (i=ds->l;i<n-1;i++) {
334: re = wr[i];
335: #if !defined(PETSC_USE_COMPLEX)
336: im = wi[i];
337: #endif
338: pos = 0;
339: j=i+1; /* j points to the next eigenvalue */
340: #if !defined(PETSC_USE_COMPLEX)
341: if (im != 0) j=i+2;
342: #endif
343: /* find minimum eigenvalue */
344: for (;j<n;j++) {
345: #if !defined(PETSC_USE_COMPLEX)
346: SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
347: #else
348: SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
349: #endif
350: if (result > 0) {
351: re = wr[j];
352: #if !defined(PETSC_USE_COMPLEX)
353: im = wi[j];
354: #endif
355: pos = j;
356: }
357: #if !defined(PETSC_USE_COMPLEX)
358: if (wi[j] != 0) j++;
359: #endif
360: }
361: if (pos) {
362: /* interchange blocks */
363: PetscBLASIntCast(pos+1,&ifst);
364: PetscBLASIntCast(i+1,&ilst);
365: #if !defined(PETSC_USE_COMPLEX)
366: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
367: #else
368: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
369: #endif
370: SlepcCheckLapackInfo("trexc",info);
371: /* recover original eigenvalues from T matrix */
372: for (j=i;j<n;j++) {
373: wr[j] = T[j+j*ld];
374: #if !defined(PETSC_USE_COMPLEX)
375: if (j<n-1 && T[j+1+j*ld] != 0.0) {
376: /* complex conjugate eigenvalue */
377: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
378: PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
379: wr[j+1] = wr[j];
380: wi[j+1] = -wi[j];
381: j++;
382: } else {
383: wi[j] = 0.0;
384: }
385: #endif
386: }
387: }
388: #if !defined(PETSC_USE_COMPLEX)
389: if (wi[i] != 0) i++;
390: #endif
391: }
392: return(0);
393: }
395: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
396: {
400: if (!rr || wr == rr) {
401: DSSort_NHEP_Total(ds,wr,wi);
402: } else {
403: DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
404: }
405: return(0);
406: }
408: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
409: {
411: PetscInt i,j,pos,inc=1;
412: PetscBLASInt ifst,ilst,info,n,ld;
413: PetscScalar *T = ds->mat[DS_MAT_A];
414: PetscScalar *Q = ds->mat[DS_MAT_Q];
415: #if !defined(PETSC_USE_COMPLEX)
416: PetscScalar *work;
417: #endif
420: PetscBLASIntCast(ds->n,&n);
421: PetscBLASIntCast(ds->ld,&ld);
422: #if !defined(PETSC_USE_COMPLEX)
423: DSAllocateWork_Private(ds,ld,0,0);
424: work = ds->work;
425: #endif
426: for (i=ds->l;i<n-1;i++) {
427: pos = perm[i];
428: #if !defined(PETSC_USE_COMPLEX)
429: inc = (pos<n-1 && T[pos+1+pos*ld] != 0.0)? 2: 1;
430: #endif
431: if (pos!=i) {
432: #if !defined(PETSC_USE_COMPLEX)
433: if ((T[pos+(pos-1)*ld] != 0.0 && perm[i+1]!=pos-1) || (T[pos+1+pos*ld] != 0.0 && perm[i+1]!=pos+1))
434: SETERRQ1(PETSC_COMM_SELF,1,"Invalid permutation due to a 2x2 block at position %D",pos);
435: #endif
437: /* interchange blocks */
438: PetscBLASIntCast(pos+1,&ifst);
439: PetscBLASIntCast(i+1,&ilst);
440: #if !defined(PETSC_USE_COMPLEX)
441: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
442: #else
443: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
444: #endif
445: SlepcCheckLapackInfo("trexc",info);
446: for (j=i+1;j<n;j++) {
447: if (perm[j]>=i && perm[j]<pos) perm[j]+=inc;
448: }
449: perm[i] = i;
450: if (inc==2) perm[i+1] = i+1;
451: }
452: if (inc==2) i++;
453: }
454: /* recover original eigenvalues from T matrix */
455: for (j=ds->l;j<n;j++) {
456: wr[j] = T[j+j*ld];
457: #if !defined(PETSC_USE_COMPLEX)
458: if (j<n-1 && T[j+1+j*ld] != 0.0) {
459: /* complex conjugate eigenvalue */
460: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
461: wr[j+1] = wr[j];
462: wi[j+1] = -wi[j];
463: j++;
464: } else {
465: wi[j] = 0.0;
466: }
467: #endif
468: }
469: return(0);
470: }
472: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
473: {
475: PetscInt i;
476: PetscBLASInt n,ld,incx=1;
477: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
480: PetscBLASIntCast(ds->n,&n);
481: PetscBLASIntCast(ds->ld,&ld);
482: A = ds->mat[DS_MAT_A];
483: Q = ds->mat[DS_MAT_Q];
484: DSAllocateWork_Private(ds,2*ld,0,0);
485: x = ds->work;
486: y = ds->work+ld;
487: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
488: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
489: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
490: ds->k = n;
491: return(0);
492: }
494: /*
495: Reduce a matrix A to upper Hessenberg form Q'*A*Q, where Q is an orthogonal matrix.
496: The result overwrites A. Matrix A has the form
498: [ S | * ]
499: A = [-------]
500: [ R | H ]
502: where S is an upper (quasi-)triangular matrix of order k, H is an upper Hessenberg
503: matrix of order n-k, and R is all zeros except the first row (the arrow).
504: The algorithm uses elementary reflectors to annihilate entries in the arrow, and
505: then proceeds upwards.
506: If ilo>1, then it is assumed that the first ilo-1 entries of the arrow are zero, and
507: hence the first ilo-1 rows and columns of Q are set to the identity matrix.
509: Required workspace is 2*n.
510: */
511: static PetscErrorCode ArrowHessenberg(PetscBLASInt n,PetscBLASInt k,PetscBLASInt ilo,PetscScalar *A,PetscBLASInt lda,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *work)
512: {
513: PetscBLASInt i,j,n0,m,inc=1,incn=-1;
514: PetscScalar t,*v=work,*w=work+n,tau,tauc;
517: m = n-ilo+1;
518: for (i=k;i>ilo;i--) {
519: for (j=0;j<=i-ilo;j++) v[j] = A[i+(i-j-1)*lda]; /* _larfg does not allow negative inc, so use buffer */
520: n0 = i-ilo+1;
521: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,v,v+1,&inc,&tau));
522: for (j=1;j<=i-ilo;j++) v[j] = PetscConj(v[j]);
523: tauc = PetscConj(tau);
524: A[i+(i-1)*lda] = v[0];
525: v[0] = 1.0;
526: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&i,&n0,v,&incn,&tauc,A+(ilo-1)*lda,&lda,w));
527: for (j=1;j<=i-ilo;j++) A[i+(i-j-1)*lda] = 0.0;
528: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,A+ilo-1+(ilo-1)*lda,&lda,w));
529: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,Q+ilo-1+(ilo-1)*ldq,&ldq,w));
530: }
531: /* trivial in-place transposition of Q */
532: for (j=ilo-1;j<k;j++) {
533: for (i=j;i<k;i++) {
534: t = Q[i+j*ldq];
535: if (i!=j) Q[i+j*ldq] = PetscConj(Q[j+i*ldq]);
536: Q[j+i*ldq] = PetscConj(t);
537: }
538: }
539: return(0);
540: }
542: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
543: {
545: PetscScalar *work,*tau;
546: PetscInt i,j;
547: PetscBLASInt ilo,lwork,info,n,k,ld;
548: PetscScalar *A = ds->mat[DS_MAT_A];
549: PetscScalar *Q = ds->mat[DS_MAT_Q];
552: #if !defined(PETSC_USE_COMPLEX)
554: #endif
555: PetscBLASIntCast(ds->n,&n);
556: PetscBLASIntCast(ds->ld,&ld);
557: PetscBLASIntCast(ds->l+1,&ilo);
558: PetscBLASIntCast(ds->k,&k);
559: DSAllocateWork_Private(ds,ld+6*ld,0,0);
560: tau = ds->work;
561: work = ds->work+ld;
562: lwork = 6*ld;
564: /* initialize orthogonal matrix */
565: PetscArrayzero(Q,ld*ld);
566: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
567: if (n==1) { /* quick return */
568: wr[0] = A[0];
569: wi[0] = 0.0;
570: return(0);
571: }
573: /* reduce to upper Hessenberg form */
574: if (ds->state<DS_STATE_INTERMEDIATE) {
575: if (PETSC_FALSE && k>0) {
576: ArrowHessenberg(n,k,ilo,A,ld,Q,ld,work);
577: } else {
578: PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
579: SlepcCheckLapackInfo("gehrd",info);
580: for (j=0;j<n-1;j++) {
581: for (i=j+2;i<n;i++) {
582: Q[i+j*ld] = A[i+j*ld];
583: A[i+j*ld] = 0.0;
584: }
585: }
586: PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
587: SlepcCheckLapackInfo("orghr",info);
588: }
589: }
591: /* compute the (real) Schur form */
592: #if !defined(PETSC_USE_COMPLEX)
593: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
594: for (j=0;j<ds->l;j++) {
595: if (j==n-1 || A[j+1+j*ld] == 0.0) {
596: /* real eigenvalue */
597: wr[j] = A[j+j*ld];
598: wi[j] = 0.0;
599: } else {
600: /* complex eigenvalue */
601: wr[j] = A[j+j*ld];
602: wr[j+1] = A[j+j*ld];
603: wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
604: PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
605: wi[j+1] = -wi[j];
606: j++;
607: }
608: }
609: #else
610: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
611: if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
612: #endif
613: SlepcCheckLapackInfo("hseqr",info);
614: return(0);
615: }
617: PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
618: {
620: PetscInt ld=ds->ld,l=ds->l,k;
621: PetscMPIInt n,rank,off=0,size,ldn;
624: k = (ds->n-l)*ld;
625: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
626: if (eigr) k += ds->n-l;
627: if (eigi) k += ds->n-l;
628: DSAllocateWork_Private(ds,k,0,0);
629: PetscMPIIntCast(k*sizeof(PetscScalar),&size);
630: PetscMPIIntCast(ds->n-l,&n);
631: PetscMPIIntCast(ld*(ds->n-l),&ldn);
632: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
633: if (!rank) {
634: MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
635: if (ds->state>DS_STATE_RAW) {
636: MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
637: }
638: if (eigr) {
639: MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
640: }
641: if (eigi) {
642: MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
643: }
644: }
645: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
646: if (rank) {
647: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
648: if (ds->state>DS_STATE_RAW) {
649: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
650: }
651: if (eigr) {
652: MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
653: }
654: if (eigi) {
655: MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
656: }
657: }
658: return(0);
659: }
661: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
662: {
663: PetscInt i,newn,ld=ds->ld,l=ds->l;
664: PetscScalar *A;
667: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
668: A = ds->mat[DS_MAT_A];
669: /* be careful not to break a diagonal 2x2 block */
670: if (A[n+(n-1)*ld]==0.0) newn = n;
671: else {
672: if (n<ds->n-1) newn = n+1;
673: else newn = n-1;
674: }
675: if (ds->extrarow && ds->k==ds->n) {
676: /* copy entries of extra row to the new position, then clean last row */
677: for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
678: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
679: }
680: ds->k = 0;
681: ds->n = newn;
682: return(0);
683: }
685: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
686: {
688: PetscScalar *work;
689: PetscReal *rwork;
690: PetscBLASInt *ipiv;
691: PetscBLASInt lwork,info,n,ld;
692: PetscReal hn,hin;
693: PetscScalar *A;
696: PetscBLASIntCast(ds->n,&n);
697: PetscBLASIntCast(ds->ld,&ld);
698: lwork = 8*ld;
699: DSAllocateWork_Private(ds,lwork,ld,ld);
700: work = ds->work;
701: rwork = ds->rwork;
702: ipiv = ds->iwork;
704: /* use workspace matrix W to avoid overwriting A */
705: DSAllocateMat_Private(ds,DS_MAT_W);
706: A = ds->mat[DS_MAT_W];
707: PetscArraycpy(A,ds->mat[DS_MAT_A],ds->ld*ds->ld);
709: /* norm of A */
710: if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
711: else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);
713: /* norm of inv(A) */
714: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
715: SlepcCheckLapackInfo("getrf",info);
716: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
717: SlepcCheckLapackInfo("getri",info);
718: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
720: *cond = hn*hin;
721: return(0);
722: }
724: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
725: {
727: PetscInt i,j;
728: PetscBLASInt *ipiv,info,n,ld,one=1,ncol;
729: PetscScalar *A,*B,*Q,*g=gin,*ghat;
730: PetscScalar done=1.0,dmone=-1.0,dzero=0.0;
731: PetscReal gnorm;
734: PetscBLASIntCast(ds->n,&n);
735: PetscBLASIntCast(ds->ld,&ld);
736: A = ds->mat[DS_MAT_A];
738: if (!recover) {
740: DSAllocateWork_Private(ds,0,0,ld);
741: ipiv = ds->iwork;
742: if (!g) {
743: DSAllocateWork_Private(ds,ld,0,0);
744: g = ds->work;
745: }
746: /* use workspace matrix W to factor A-tau*eye(n) */
747: DSAllocateMat_Private(ds,DS_MAT_W);
748: B = ds->mat[DS_MAT_W];
749: PetscArraycpy(B,A,ld*ld);
751: /* Vector g initialy stores b = beta*e_n^T */
752: PetscArrayzero(g,n);
753: g[n-1] = beta;
755: /* g = (A-tau*eye(n))'\b */
756: for (i=0;i<n;i++) B[i+i*ld] -= tau;
757: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
758: SlepcCheckLapackInfo("getrf",info);
759: PetscLogFlops(2.0*n*n*n/3.0);
760: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
761: SlepcCheckLapackInfo("getrs",info);
762: PetscLogFlops(2.0*n*n-n);
764: /* A = A + g*b' */
765: for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;
767: } else { /* recover */
769: DSAllocateWork_Private(ds,ld,0,0);
770: ghat = ds->work;
771: Q = ds->mat[DS_MAT_Q];
773: /* g^ = -Q(:,idx)'*g */
774: PetscBLASIntCast(ds->l+ds->k,&ncol);
775: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));
777: /* A = A + g^*b' */
778: for (i=0;i<ds->l+ds->k;i++)
779: for (j=ds->l;j<ds->l+ds->k;j++)
780: A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;
782: /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
783: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
784: }
786: /* Compute gamma factor */
787: if (gamma) {
788: gnorm = 0.0;
789: for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
790: *gamma = PetscSqrtReal(1.0+gnorm);
791: }
792: return(0);
793: }
795: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
796: {
798: ds->ops->allocate = DSAllocate_NHEP;
799: ds->ops->view = DSView_NHEP;
800: ds->ops->vectors = DSVectors_NHEP;
801: ds->ops->solve[0] = DSSolve_NHEP;
802: ds->ops->sort = DSSort_NHEP;
803: ds->ops->sortperm = DSSortWithPermutation_NHEP;
804: ds->ops->synchronize = DSSynchronize_NHEP;
805: ds->ops->truncate = DSTruncate_NHEP;
806: ds->ops->update = DSUpdateExtraRow_NHEP;
807: ds->ops->cond = DSCond_NHEP;
808: ds->ops->transharm = DSTranslateHarmonic_NHEP;
809: return(0);
810: }