Actual source code: dsnhep.c
slepc-3.7.0 2016-05-16
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <slepc/private/dsimpl.h>
23: #include <slepcblaslapack.h>
27: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
28: {
32: DSAllocateMat_Private(ds,DS_MAT_A);
33: DSAllocateMat_Private(ds,DS_MAT_Q);
34: PetscFree(ds->perm);
35: PetscMalloc1(ld,&ds->perm);
36: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
37: return(0);
38: }
42: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
43: {
47: DSViewMat(ds,viewer,DS_MAT_A);
48: if (ds->state>DS_STATE_INTERMEDIATE) {
49: DSViewMat(ds,viewer,DS_MAT_Q);
50: }
51: if (ds->mat[DS_MAT_X]) {
52: DSViewMat(ds,viewer,DS_MAT_X);
53: }
54: if (ds->mat[DS_MAT_Y]) {
55: DSViewMat(ds,viewer,DS_MAT_Y);
56: }
57: return(0);
58: }
62: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
63: {
64: #if defined(PETSC_MISSING_LAPACK_GESVD)
66: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
67: #else
69: PetscInt i,j;
70: PetscBLASInt info,ld,n,n1,lwork,inc=1;
71: PetscScalar sdummy,done=1.0,zero=0.0;
72: PetscReal *sigma;
73: PetscBool iscomplex = PETSC_FALSE;
74: PetscScalar *A = ds->mat[DS_MAT_A];
75: PetscScalar *Q = ds->mat[DS_MAT_Q];
76: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
77: PetscScalar *W;
80: if (left) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for left vectors");
81: PetscBLASIntCast(ds->n,&n);
82: PetscBLASIntCast(ds->ld,&ld);
83: n1 = n+1;
84: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
85: if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
86: DSAllocateWork_Private(ds,5*ld,6*ld,0);
87: DSAllocateMat_Private(ds,DS_MAT_W);
88: W = ds->mat[DS_MAT_W];
89: lwork = 5*ld;
90: sigma = ds->rwork+5*ld;
92: /* build A-w*I in W */
93: for (j=0;j<n;j++)
94: for (i=0;i<=n;i++)
95: W[i+j*ld] = A[i+j*ld];
96: for (i=0;i<n;i++)
97: W[i+i*ld] -= A[(*k)+(*k)*ld];
99: /* compute SVD of W */
100: #if !defined(PETSC_USE_COMPLEX)
101: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
102: #else
103: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
104: #endif
105: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);
107: /* the smallest singular value is the new error estimate */
108: if (rnorm) *rnorm = sigma[n-1];
110: /* update vector with right singular vector associated to smallest singular value,
111: accumulating the transformation matrix Q */
112: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
113: return(0);
114: #endif
115: }
119: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
120: {
122: PetscInt i;
125: for (i=0;i<ds->n;i++) {
126: DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
127: }
128: return(0);
129: }
133: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
134: {
135: #if defined(SLEPC_MISSING_LAPACK_TREVC)
137: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
138: #else
140: PetscInt i;
141: PetscBLASInt mm=1,mout,info,ld,n,*select,inc = 1;
142: PetscScalar tmp,done=1.0,zero=0.0;
143: PetscReal norm;
144: PetscBool iscomplex = PETSC_FALSE;
145: PetscScalar *A = ds->mat[DS_MAT_A];
146: PetscScalar *Q = ds->mat[DS_MAT_Q];
147: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
148: PetscScalar *Y;
151: PetscBLASIntCast(ds->n,&n);
152: PetscBLASIntCast(ds->ld,&ld);
153: DSAllocateWork_Private(ds,0,0,ld);
154: select = ds->iwork;
155: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
157: /* compute k-th eigenvector Y of A */
158: Y = X+(*k)*ld;
159: select[*k] = (PetscBLASInt)PETSC_TRUE;
160: #if !defined(PETSC_USE_COMPLEX)
161: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
162: mm = iscomplex? 2: 1;
163: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
164: DSAllocateWork_Private(ds,3*ld,0,0);
165: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
166: #else
167: DSAllocateWork_Private(ds,2*ld,ld,0);
168: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
169: #endif
170: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
171: if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");
173: /* accumulate and normalize eigenvectors */
174: if (ds->state>=DS_STATE_CONDENSED) {
175: PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));
176: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
177: #if !defined(PETSC_USE_COMPLEX)
178: if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
179: #endif
180: norm = BLASnrm2_(&n,Y,&inc);
181: #if !defined(PETSC_USE_COMPLEX)
182: if (iscomplex) {
183: tmp = BLASnrm2_(&n,Y+ld,&inc);
184: norm = SlepcAbsEigenvalue(norm,tmp);
185: }
186: #endif
187: tmp = 1.0 / norm;
188: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
189: #if !defined(PETSC_USE_COMPLEX)
190: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
191: #endif
192: }
194: /* set output arguments */
195: if (iscomplex) (*k)++;
196: if (rnorm) {
197: if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
198: else *rnorm = PetscAbsScalar(Y[n-1]);
199: }
200: return(0);
201: #endif
202: }
206: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
207: {
208: #if defined(SLEPC_MISSING_LAPACK_TREVC)
210: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
211: #else
213: PetscInt i;
214: PetscBLASInt n,ld,mout,info,inc = 1;
215: PetscBool iscomplex = PETSC_FALSE;
216: PetscScalar *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
217: PetscReal norm;
218: const char *side,*back;
221: PetscBLASIntCast(ds->n,&n);
222: PetscBLASIntCast(ds->ld,&ld);
223: if (left) {
224: X = NULL;
225: Y = ds->mat[DS_MAT_Y];
226: side = "L";
227: } else {
228: X = ds->mat[DS_MAT_X];
229: Y = NULL;
230: side = "R";
231: }
232: Z = left? Y: X;
233: if (ds->state>=DS_STATE_CONDENSED) {
234: /* DSSolve() has been called, backtransform with matrix Q */
235: back = "B";
236: PetscMemcpy(Z,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
237: } else back = "A";
238: #if !defined(PETSC_USE_COMPLEX)
239: DSAllocateWork_Private(ds,3*ld,0,0);
240: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
241: #else
242: DSAllocateWork_Private(ds,2*ld,ld,0);
243: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
244: #endif
245: if (info) SETERRQ1(PetscObjectComm((PetscObject)ds),PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
247: /* normalize eigenvectors */
248: for (i=0;i<n;i++) {
249: if (i<n-1 && A[i+1+i*ld]!=0.0) iscomplex = PETSC_TRUE;
250: norm = BLASnrm2_(&n,Z+i*ld,&inc);
251: #if !defined(PETSC_USE_COMPLEX)
252: if (iscomplex) {
253: tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
254: norm = SlepcAbsEigenvalue(norm,tmp);
255: }
256: #endif
257: tmp = 1.0 / norm;
258: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
259: #if !defined(PETSC_USE_COMPLEX)
260: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
261: #endif
262: if (iscomplex) i++;
263: }
264: return(0);
265: #endif
266: }
270: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
271: {
275: switch (mat) {
276: case DS_MAT_X:
277: if (ds->refined) {
278: if (!ds->extrarow) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Refined vectors require activating the extra row");
279: if (j) {
280: DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
281: } else {
282: DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
283: }
284: } else {
285: if (j) {
286: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
287: } else {
288: DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
289: }
290: }
291: break;
292: case DS_MAT_Y:
293: if (ds->refined) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
294: if (j) {
295: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
296: } else {
297: DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
298: }
299: break;
300: case DS_MAT_U:
301: case DS_MAT_VT:
302: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
303: break;
304: default:
305: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
306: }
307: if (ds->state < DS_STATE_CONDENSED) {
308: DSSetState(ds,DS_STATE_CONDENSED);
309: }
310: return(0);
311: }
315: PetscErrorCode DSNormalize_NHEP(DS ds,DSMatType mat,PetscInt col)
316: {
318: PetscInt i,i0,i1;
319: PetscBLASInt ld,n,one = 1;
320: PetscScalar *A = ds->mat[DS_MAT_A],norm,*x;
321: #if !defined(PETSC_USE_COMPLEX)
322: PetscScalar norm0;
323: #endif
326: switch (mat) {
327: case DS_MAT_X:
328: case DS_MAT_Y:
329: case DS_MAT_Q:
330: /* Supported matrices */
331: break;
332: case DS_MAT_U:
333: case DS_MAT_VT:
334: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
335: break;
336: default:
337: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
338: }
340: PetscBLASIntCast(ds->n,&n);
341: PetscBLASIntCast(ds->ld,&ld);
342: DSGetArray(ds,mat,&x);
343: if (col < 0) {
344: i0 = 0; i1 = ds->n;
345: } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
346: i0 = col-1; i1 = col+1;
347: } else {
348: i0 = col; i1 = col+1;
349: }
350: for (i=i0;i<i1;i++) {
351: #if !defined(PETSC_USE_COMPLEX)
352: if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
353: norm = BLASnrm2_(&n,&x[ld*i],&one);
354: norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
355: norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
356: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
357: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*(i+1)],&one));
358: i++;
359: } else
360: #endif
361: {
362: norm = BLASnrm2_(&n,&x[ld*i],&one);
363: norm = 1.0/norm;
364: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
365: }
366: }
367: return(0);
368: }
372: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
373: {
374: #if defined(PETSC_MISSING_LAPACK_TRSEN)
376: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
377: #else
379: PetscInt i;
380: PetscBLASInt info,n,ld,mout,lwork,*selection;
381: PetscScalar *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
382: #if !defined(PETSC_USE_COMPLEX)
383: PetscBLASInt *iwork,liwork;
384: #endif
387: if (!k) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must supply argument k");
388: PetscBLASIntCast(ds->n,&n);
389: PetscBLASIntCast(ds->ld,&ld);
390: #if !defined(PETSC_USE_COMPLEX)
391: lwork = n;
392: liwork = 1;
393: DSAllocateWork_Private(ds,lwork,0,liwork+n);
394: work = ds->work;
395: lwork = ds->lwork;
396: selection = ds->iwork;
397: iwork = ds->iwork + n;
398: liwork = ds->liwork - n;
399: #else
400: lwork = 1;
401: DSAllocateWork_Private(ds,lwork,0,n);
402: work = ds->work;
403: selection = ds->iwork;
404: #endif
405: /* Compute the selected eigenvalue to be in the leading position */
406: DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
407: PetscMemzero(selection,n*sizeof(PetscBLASInt));
408: for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
409: #if !defined(PETSC_USE_COMPLEX)
410: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
411: #else
412: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
413: #endif
414: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSEN %d",info);
415: *k = mout;
416: return(0);
417: #endif
418: }
422: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
423: {
424: #if defined(SLEPC_MISSING_LAPACK_TREXC)
426: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
427: #else
429: PetscScalar re;
430: PetscInt i,j,pos,result;
431: PetscBLASInt ifst,ilst,info,n,ld;
432: PetscScalar *T = ds->mat[DS_MAT_A];
433: PetscScalar *Q = ds->mat[DS_MAT_Q];
434: #if !defined(PETSC_USE_COMPLEX)
435: PetscScalar *work,im;
436: #endif
439: PetscBLASIntCast(ds->n,&n);
440: PetscBLASIntCast(ds->ld,&ld);
441: #if !defined(PETSC_USE_COMPLEX)
442: DSAllocateWork_Private(ds,ld,0,0);
443: work = ds->work;
444: #endif
445: /* selection sort */
446: for (i=ds->l;i<n-1;i++) {
447: re = wr[i];
448: #if !defined(PETSC_USE_COMPLEX)
449: im = wi[i];
450: #endif
451: pos = 0;
452: j=i+1; /* j points to the next eigenvalue */
453: #if !defined(PETSC_USE_COMPLEX)
454: if (im != 0) j=i+2;
455: #endif
456: /* find minimum eigenvalue */
457: for (;j<n;j++) {
458: #if !defined(PETSC_USE_COMPLEX)
459: SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
460: #else
461: SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
462: #endif
463: if (result > 0) {
464: re = wr[j];
465: #if !defined(PETSC_USE_COMPLEX)
466: im = wi[j];
467: #endif
468: pos = j;
469: }
470: #if !defined(PETSC_USE_COMPLEX)
471: if (wi[j] != 0) j++;
472: #endif
473: }
474: if (pos) {
475: /* interchange blocks */
476: PetscBLASIntCast(pos+1,&ifst);
477: PetscBLASIntCast(i+1,&ilst);
478: #if !defined(PETSC_USE_COMPLEX)
479: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
480: #else
481: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
482: #endif
483: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREXC %d",info);
484: /* recover original eigenvalues from T matrix */
485: for (j=i;j<n;j++) {
486: wr[j] = T[j+j*ld];
487: #if !defined(PETSC_USE_COMPLEX)
488: if (j<n-1 && T[j+1+j*ld] != 0.0) {
489: /* complex conjugate eigenvalue */
490: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
491: PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
492: wr[j+1] = wr[j];
493: wi[j+1] = -wi[j];
494: j++;
495: } else {
496: wi[j] = 0.0;
497: }
498: #endif
499: }
500: }
501: #if !defined(PETSC_USE_COMPLEX)
502: if (wi[i] != 0) i++;
503: #endif
504: }
505: return(0);
506: #endif
507: }
511: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
512: {
516: if (!rr || wr == rr) {
517: DSSort_NHEP_Total(ds,wr,wi);
518: } else {
519: DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
520: }
521: return(0);
522: }
526: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
527: {
529: PetscInt i;
530: PetscBLASInt n,ld,incx=1;
531: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
534: PetscBLASIntCast(ds->n,&n);
535: PetscBLASIntCast(ds->ld,&ld);
536: A = ds->mat[DS_MAT_A];
537: Q = ds->mat[DS_MAT_Q];
538: DSAllocateWork_Private(ds,2*ld,0,0);
539: x = ds->work;
540: y = ds->work+ld;
541: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
542: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
543: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
544: ds->k = n;
545: return(0);
546: }
550: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
551: {
552: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
554: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
555: #else
557: PetscScalar *work,*tau;
558: PetscInt i,j;
559: PetscBLASInt ilo,lwork,info,n,ld;
560: PetscScalar *A = ds->mat[DS_MAT_A];
561: PetscScalar *Q = ds->mat[DS_MAT_Q];
564: #if !defined(PETSC_USE_COMPLEX)
566: #endif
567: PetscBLASIntCast(ds->n,&n);
568: PetscBLASIntCast(ds->ld,&ld);
569: PetscBLASIntCast(ds->l+1,&ilo);
570: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
571: tau = ds->work;
572: work = ds->work+ld;
573: lwork = ld*ld;
575: /* initialize orthogonal matrix */
576: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
577: for (i=0;i<n;i++)
578: Q[i+i*ld] = 1.0;
579: if (n==1) { /* quick return */
580: wr[0] = A[0];
581: wi[0] = 0.0;
582: return(0);
583: }
585: /* reduce to upper Hessenberg form */
586: if (ds->state<DS_STATE_INTERMEDIATE) {
587: PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
588: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",info);
589: for (j=0;j<n-1;j++) {
590: for (i=j+2;i<n;i++) {
591: Q[i+j*ld] = A[i+j*ld];
592: A[i+j*ld] = 0.0;
593: }
594: }
595: PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
596: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",info);
597: }
599: /* compute the (real) Schur form */
600: #if !defined(PETSC_USE_COMPLEX)
601: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
602: for (j=0;j<ds->l;j++) {
603: if (j==n-1 || A[j+1+j*ld] == 0.0) {
604: /* real eigenvalue */
605: wr[j] = A[j+j*ld];
606: wi[j] = 0.0;
607: } else {
608: /* complex eigenvalue */
609: wr[j] = A[j+j*ld];
610: wr[j+1] = A[j+j*ld];
611: wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
612: PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
613: wi[j+1] = -wi[j];
614: j++;
615: }
616: }
617: #else
618: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
619: if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
620: #endif
621: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",info);
622: return(0);
623: #endif
624: }
628: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
629: {
630: PetscInt i,newn,ld=ds->ld,l=ds->l;
631: PetscScalar *A;
634: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
635: A = ds->mat[DS_MAT_A];
636: /* be careful not to break a diagonal 2x2 block */
637: if (A[n+(n-1)*ld]==0.0) newn = n;
638: else {
639: if (n<ds->n-1) newn = n+1;
640: else newn = n-1;
641: }
642: if (ds->extrarow && ds->k==ds->n) {
643: /* copy entries of extra row to the new position, then clean last row */
644: for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
645: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
646: }
647: ds->k = 0;
648: ds->n = newn;
649: return(0);
650: }
654: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
655: {
656: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
658: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
659: #else
661: PetscScalar *work;
662: PetscReal *rwork;
663: PetscBLASInt *ipiv;
664: PetscBLASInt lwork,info,n,ld;
665: PetscReal hn,hin;
666: PetscScalar *A;
669: PetscBLASIntCast(ds->n,&n);
670: PetscBLASIntCast(ds->ld,&ld);
671: lwork = 8*ld;
672: DSAllocateWork_Private(ds,lwork,ld,ld);
673: work = ds->work;
674: rwork = ds->rwork;
675: ipiv = ds->iwork;
677: /* use workspace matrix W to avoid overwriting A */
678: DSAllocateMat_Private(ds,DS_MAT_W);
679: A = ds->mat[DS_MAT_W];
680: PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);
682: /* norm of A */
683: if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
684: else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);
686: /* norm of inv(A) */
687: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
688: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
689: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
690: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
691: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
693: *cond = hn*hin;
694: return(0);
695: #endif
696: }
700: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
701: {
702: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
704: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
705: #else
707: PetscInt i,j;
708: PetscBLASInt *ipiv,info,n,ld,one=1,ncol;
709: PetscScalar *A,*B,*Q,*g=gin,*ghat;
710: PetscScalar done=1.0,dmone=-1.0,dzero=0.0;
711: PetscReal gnorm;
714: PetscBLASIntCast(ds->n,&n);
715: PetscBLASIntCast(ds->ld,&ld);
716: A = ds->mat[DS_MAT_A];
718: if (!recover) {
720: DSAllocateWork_Private(ds,0,0,ld);
721: ipiv = ds->iwork;
722: if (!g) {
723: DSAllocateWork_Private(ds,ld,0,0);
724: g = ds->work;
725: }
726: /* use workspace matrix W to factor A-tau*eye(n) */
727: DSAllocateMat_Private(ds,DS_MAT_W);
728: B = ds->mat[DS_MAT_W];
729: PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);
731: /* Vector g initialy stores b = beta*e_n^T */
732: PetscMemzero(g,n*sizeof(PetscScalar));
733: g[n-1] = beta;
735: /* g = (A-tau*eye(n))'\b */
736: for (i=0;i<n;i++)
737: B[i+i*ld] -= tau;
738: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
739: if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
740: if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
741: PetscLogFlops(2.0*n*n*n/3.0);
742: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
743: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
744: PetscLogFlops(2.0*n*n-n);
746: /* A = A + g*b' */
747: for (i=0;i<n;i++)
748: A[i+(n-1)*ld] += g[i]*beta;
750: } else { /* recover */
753: DSAllocateWork_Private(ds,ld,0,0);
754: ghat = ds->work;
755: Q = ds->mat[DS_MAT_Q];
757: /* g^ = -Q(:,idx)'*g */
758: PetscBLASIntCast(ds->l+ds->k,&ncol);
759: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));
761: /* A = A + g^*b' */
762: for (i=0;i<ds->l+ds->k;i++)
763: for (j=ds->l;j<ds->l+ds->k;j++)
764: A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;
766: /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
767: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
768: }
770: /* Compute gamma factor */
771: if (gamma) {
772: gnorm = 0.0;
773: for (i=0;i<n;i++)
774: gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
775: *gamma = PetscSqrtReal(1.0+gnorm);
776: }
777: return(0);
778: #endif
779: }
783: PETSC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
784: {
786: ds->ops->allocate = DSAllocate_NHEP;
787: ds->ops->view = DSView_NHEP;
788: ds->ops->vectors = DSVectors_NHEP;
789: ds->ops->solve[0] = DSSolve_NHEP;
790: ds->ops->sort = DSSort_NHEP;
791: ds->ops->truncate = DSTruncate_NHEP;
792: ds->ops->update = DSUpdateExtraRow_NHEP;
793: ds->ops->cond = DSCond_NHEP;
794: ds->ops->transharm = DSTranslateHarmonic_NHEP;
795: ds->ops->normalize = DSNormalize_NHEP;
796: return(0);
797: }