Actual source code: nrefine.c
slepc-3.7.1 2016-05-27
1: /*
2: Newton refinement for polynomial eigenproblems.
4: References:
6: [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
7: of invariant pairs for matrix polynomials", Linear Algebra Appl.
8: 435(3):514-536, 2011.
10: [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
11: polynomial eigenvalue problems", submitted, 2015.
13: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
14: SLEPc - Scalable Library for Eigenvalue Problem Computations
15: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
17: This file is part of SLEPc.
19: SLEPc is free software: you can redistribute it and/or modify it under the
20: terms of version 3 of the GNU Lesser General Public License as published by
21: the Free Software Foundation.
23: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
24: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
26: more details.
28: You should have received a copy of the GNU Lesser General Public License
29: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
30: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
31: */
33: #include <slepc/private/pepimpl.h>
34: #include <slepcblaslapack.h>
36: typedef struct {
37: Mat *A,M1;
38: BV V,M2,M3,W;
39: PetscInt k,nmat;
40: PetscScalar *fih,*work,*M4;
41: PetscBLASInt *pM4;
42: PetscBool compM1;
43: Vec t;
44: } FSubctx;
46: typedef struct {
47: Mat E[2],M1;
48: Vec tN,ttN,t1,vseq;
49: VecScatter scatterctx;
50: PetscBool compM1;
51: PetscInt *map0,*map1,*idxg,*idxp;
52: PetscSubcomm subc;
53: VecScatter scatter_sub;
54: VecScatter *scatter_id,*scatterp_id;
55: Mat *A;
56: BV V,W,M2,M3,Wt;
57: PetscScalar *M4,*w,*wt,*d,*dt;
58: Vec t,tg,Rv,Vi,tp,tpg;
59: PetscInt idx,*cols;
60: } MatExplicitCtx;
64: static PetscErrorCode MatFSMult(Mat M ,Vec x,Vec y)
65: {
66: #if defined(PETSC_MISSING_LAPACK_GETRS)
68: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable");
69: #else
71: FSubctx *ctx;
72: PetscInt k,i;
73: PetscScalar *c;
74: PetscBLASInt k_,one=1,info;
77: MatShellGetContext(M,&ctx);
78: VecCopy(x,ctx->t);
79: k = ctx->k;
80: c = ctx->work;
81: PetscBLASIntCast(k,&k_);
82: MatMult(ctx->M1,x,y);
83: VecConjugate(ctx->t);
84: BVDotVec(ctx->M3,ctx->t,c);
85: for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
86: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
87: BVMultVec(ctx->M2,-1.0,1.0,y,c);
88: return(0);
89: #endif
90: }
94: /*
95: Evaluates the first d elements of the polynomial basis
96: on a given matrix H which is considered to be triangular
97: */
98: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
99: {
101: PetscInt i,j,ldfh=nm*k,off,nmat=pep->nmat;
102: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
103: PetscScalar corr=0.0,alpha,beta;
104: PetscBLASInt k_,ldh_,ldfh_;
107: PetscBLASIntCast(ldh,&ldh_);
108: PetscBLASIntCast(k,&k_);
109: PetscBLASIntCast(ldfh,&ldfh_);
110: PetscMemzero(fH,nm*k*k*sizeof(PetscScalar));
111: if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
112: if (nm>1) {
113: t = b[0]/a[0];
114: off = k;
115: for (j=0;j<k;j++) {
116: for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
117: fH[j+j*ldfh] -= t;
118: }
119: }
120: for (i=2;i<nm;i++) {
121: off = i*k;
122: if (i==2) {
123: for (j=0;j<k;j++) {
124: fH[off+j+j*ldfh] = 1.0;
125: H[j+j*ldh] -= b[1];
126: }
127: } else {
128: for (j=0;j<k;j++) {
129: PetscMemcpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k*sizeof(PetscScalar));
130: H[j+j*ldh] += corr-b[i-1];
131: }
132: }
133: corr = b[i-1];
134: beta = -g[i-1]/a[i-1];
135: alpha = 1/a[i-1];
136: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
137: }
138: for (j=0;j<k;j++) H[j+j*ldh] += corr;
139: return(0);
140: }
144: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,FSubctx *ctx)
145: {
146: #if defined(PETSC_MISSING_LAPACK_GESV)
148: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV - Lapack routine is unavailable");
149: #else
150: PetscErrorCode ierr;
151: PetscScalar *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
152: const PetscScalar *m3,*m2;
153: PetscInt i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
154: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
155: PetscBLASInt k_,lda_,lds_,nloc_,one=1,info;
156: Mat *A=ctx->A,Mk,M1=ctx->M1,P;
157: BV V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
158: MatStructure str;
159: Vec vc;
162: STGetMatStructure(pep->st,&str);
163: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
164: DHii = T12;
165: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
166: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
167: for (d=2;d<nmat;d++) {
168: for (j=0;j<k;j++) {
169: for (i=0;i<k;i++) {
170: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
171: }
172: }
173: }
174: /* T11 */
175: if (!ctx->compM1) {
176: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
177: PEPEvaluateBasis(pep,h,0,Ts,NULL);
178: for (j=1;j<nmat;j++) {
179: MatAXPY(M1,Ts[j],A[j],str);
180: }
181: }
183: /* T22 */
184: PetscBLASIntCast(lds,&lds_);
185: PetscBLASIntCast(k,&k_);
186: PetscBLASIntCast(lda,&lda_);
187: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
188: for (i=1;i<deg;i++) {
189: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
190: s = (i==1)?0.0:1.0;
191: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
192: }
193: for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }
195: /* T12 */
196: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
197: for (i=1;i<nmat;i++) {
198: MatDenseGetArray(Mk,&array);
199: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
200: MatDenseRestoreArray(Mk,&array);
201: BVSetActiveColumns(W,0,k);
202: BVMult(W,1.0,0.0,V,Mk);
203: if (i==1) {
204: BVMatMult(W,A[i],M2);
205: } else {
206: BVMatMult(W,A[i],M3); /* using M3 as work space */
207: BVMult(M2,1.0,1.0,M3,NULL);
208: }
209: }
211: /* T21 */
212: MatDenseGetArray(Mk,&array);
213: for (i=1;i<deg;i++) {
214: s = (i==1)?0.0:1.0;
215: ss = PetscConj(fh[i]);
216: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
217: }
218: MatDenseRestoreArray(Mk,&array);
219: BVSetActiveColumns(M3,0,k);
220: BVMult(M3,1.0,0.0,V,Mk);
221: for (i=0;i<k;i++) {
222: BVGetColumn(M3,i,&vc);
223: VecConjugate(vc);
224: BVRestoreColumn(M3,i,&vc);
225: }
226: MatDestroy(&Mk);
227: PetscFree3(T12,Tr,Ts);
229: VecGetLocalSize(ctx->t,&nloc);
230: PetscBLASIntCast(nloc,&nloc_);
231: PetscMalloc1(nloc*k,&T);
232: KSPGetOperators(pep->refineksp,NULL,&P);
233: if (!ctx->compM1) { MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN); }
234: BVGetArrayRead(ctx->M2,&m2);
235: BVGetArrayRead(ctx->M3,&m3);
236: VecGetArray(ctx->t,&v);
237: for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
238: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
239: for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
240: VecRestoreArray(ctx->t,&v);
241: BVRestoreArrayRead(ctx->M2,&m2);
242: BVRestoreArrayRead(ctx->M3,&m3);
243: MatDiagonalSet(P,ctx->t,ADD_VALUES);
244: PetscFree(T);
245: KSPSetUp(pep->refineksp);
246: return(0);
247: #endif
248: }
252: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
253: {
254: #if defined(PETSC_MISSING_LAPACK_GETRS)
256: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable");
257: #else
258: PetscErrorCode ierr;
259: PetscScalar *t0;
260: PetscBLASInt k_,one=1,info,lda_;
261: PetscInt i,lda=nmat*k;
262: Mat M;
263: FSubctx *ctx;
264: KSPConvergedReason reason;
267: KSPGetOperators(ksp,&M,NULL);
268: MatShellGetContext(M,&ctx);
269: PetscCalloc1(k,&t0);
270: PetscBLASIntCast(lda,&lda_);
271: PetscBLASIntCast(k,&k_);
272: for (i=0;i<k;i++) t0[i] = Rh[i];
273: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
274: BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
275: KSPSolve(ksp,Rv,dVi);
276: KSPGetConvergedReason(ksp,&reason);
277: if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
278: VecConjugate(dVi);
279: BVDotVec(ctx->M3,dVi,dHi);
280: VecConjugate(dVi);
281: for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
282: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
283: PetscFree(t0);
284: return(0);
285: #endif
286: }
290: /*
291: Computes the residual P(H,V*S)*e_j for the polynomial
292: */
293: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
294: {
296: PetscScalar *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
297: PetscReal *a=pcf,*b=pcf+nmat,*g=b+nmat;
298: PetscInt i,ii,jj,lda;
299: PetscBLASInt lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
300: Mat M0;
301: Vec w;
304: PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
305: lda = k*nmat;
306: PetscBLASIntCast(k,&k_);
307: PetscBLASIntCast(lds,&lds_);
308: PetscBLASIntCast(lda,&lda_);
309: PetscBLASIntCast(nmat,&nmat_);
310: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
311: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
312: BVSetActiveColumns(W,0,nmat);
313: BVMult(W,1.0,0.0,V,M0);
314: MatDestroy(&M0);
316: BVGetColumn(W,0,&w);
317: MatMult(A[0],w,Rv);
318: BVRestoreColumn(W,0,&w);
319: for (i=1;i<nmat;i++) {
320: BVGetColumn(W,i,&w);
321: MatMult(A[i],w,t);
322: BVRestoreColumn(W,i,&w);
323: VecAXPY(Rv,1.0,t);
324: }
325: /* Update right-hand side */
326: if (j) {
327: PetscBLASIntCast(ldh,&ldh_);
328: PetscMemzero(Z,k*k*sizeof(PetscScalar));
329: PetscMemzero(DS0,k*k*sizeof(PetscScalar));
330: PetscMemcpy(Z+(j-1)*k,dH+(j-1)*k,k*sizeof(PetscScalar));
331: /* Update DfH */
332: for (i=1;i<nmat;i++) {
333: if (i>1) {
334: beta = -g[i-1];
335: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
336: tt += -b[i-1];
337: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
338: tt = b[i-1];
339: beta = 1.0/a[i-1];
340: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
341: F = DS0; DS0 = DS1; DS1 = F;
342: } else {
343: PetscMemzero(DS1,k*k*sizeof(PetscScalar));
344: for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
345: }
346: for (jj=j;jj<k;jj++) {
347: for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
348: }
349: }
350: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
351: /* Update right-hand side */
352: PetscBLASIntCast(2*k,&k2_);
353: PetscBLASIntCast(j,&j_);
354: PetscBLASIntCast(k+rds,&krds_);
355: c0 = DS0;
356: PetscMemzero(Rh,k*sizeof(PetscScalar));
357: for (i=0;i<nmat;i++) {
358: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
359: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
360: BVMultVec(V,1.0,0.0,t,h);
361: BVSetActiveColumns(dV,0,rds);
362: BVMultVec(dV,1.0,1.0,t,h+k);
363: BVGetColumn(W,i,&w);
364: MatMult(A[i],t,w);
365: BVRestoreColumn(W,i,&w);
366: if (i>0 && i<nmat-1) {
367: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
368: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
369: }
370: }
372: for (i=0;i<nmat;i++) h[i] = -1.0;
373: BVMultVec(W,1.0,1.0,Rv,h);
374: }
375: PetscFree4(h,DS0,DS1,Z);
376: return(0);
377: }
381: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
382: {
383: PetscErrorCode ierr;
384: PetscInt i,j,incf,incc;
385: PetscScalar *y,*g,*xx2,*ww,y2,*dd;
386: Vec v,t,xx1;
387: BV WW,T;
388: KSPConvergedReason reason;
391: PetscMalloc3(sz,&y,sz,&g,k,&xx2);
392: if (trans) {
393: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
394: } else {
395: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
396: }
397: xx1 = vw;
398: VecCopy(x1,xx1);
399: PetscMemcpy(xx2,x2,sz*sizeof(PetscScalar));
400: PetscMemzero(sol2,k*sizeof(PetscScalar));
401: for (i=sz-1;i>=0;i--) {
402: BVGetColumn(WW,i,&v);
403: VecConjugate(v);
404: VecDot(xx1,v,y+i);
405: VecConjugate(v);
406: BVRestoreColumn(WW,i,&v);
407: for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
408: y[i] = -(y[i]-xx2[i])/dd[i];
409: BVGetColumn(T,i,&t);
410: VecAXPY(xx1,-y[i],t);
411: BVRestoreColumn(T,i,&t);
412: for(j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
413: g[i] = xx2[i];
414: }
415: if (trans) {
416: KSPSolveTranspose(ksp,xx1,sol1);
417: KSPGetConvergedReason(ksp,&reason);
418: if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
419: } else {
420: KSPSolve(ksp,xx1,sol1);
421: KSPGetConvergedReason(ksp,&reason);
422: if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
423: }
424: if (trans) {
425: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
426: } else {
427: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
428: }
429: for (i=0;i<sz;i++) {
430: BVGetColumn(T,i,&t);
431: VecConjugate(t);
432: VecDot(sol1,t,&y2);
433: VecConjugate(t);
434: BVRestoreColumn(T,i,&t);
435: for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
436: y2 = (g[i]-y2)/dd[i];
437: BVGetColumn(WW,i,&v);
438: VecAXPY(sol1,-y2,v);
439: for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
440: sol2[i] = y[i]+y2;
441: BVRestoreColumn(WW,i,&v);
442: }
443: PetscFree3(y,g,xx2);
444: return(0);
445: }
449: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx)
450: {
452: PetscInt i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
453: Mat M1=matctx->M1,*A,*At,Mk;
454: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
455: PetscScalar s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
456: PetscScalar *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
457: PetscBLASInt lds_,lda_,k_;
458: MatStructure str;
459: PetscBool flg;
460: BV M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
461: Vec vc,vc2;
464: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
465: STGetMatStructure(pep->st,&str);
466: STGetTransform(pep->st,&flg);
467: if (flg) {
468: PetscMalloc1(pep->nmat,&At);
469: for (i=0;i<pep->nmat;i++) {
470: STGetTOperators(pep->st,i,&At[i]);
471: }
472: } else At = pep->A;
473: if (matctx->subc) A = matctx->A;
474: else A = At;
475: /* Form the explicit system matrix */
476: DHii = T12;
477: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
478: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
479: for (l=2;l<nmat;l++) {
480: for (j=0;j<k;j++) {
481: for (i=0;i<k;i++) {
482: DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
483: }
484: }
485: }
487: /* T11 */
488: if (!matctx->compM1) {
489: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
490: PEPEvaluateBasis(pep,h,0,Ts,NULL);
491: for (j=1;j<nmat;j++) {
492: MatAXPY(M1,Ts[j],A[j],str);
493: }
494: }
496: /* T22 */
497: PetscBLASIntCast(lds,&lds_);
498: PetscBLASIntCast(k,&k_);
499: PetscBLASIntCast(lda,&lda_);
500: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
501: for (i=1;i<deg;i++) {
502: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
503: s = (i==1)?0.0:1.0;
504: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
505: }
507: /* T12 */
508: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
509: for (i=1;i<nmat;i++) {
510: MatDenseGetArray(Mk,&array);
511: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
512: MatDenseRestoreArray(Mk,&array);
513: BVSetActiveColumns(W,0,k);
514: BVMult(W,1.0,0.0,V,Mk);
515: if (i==1) {
516: BVMatMult(W,A[i],M2);
517: } else {
518: BVMatMult(W,A[i],M3); /* using M3 as work space */
519: BVMult(M2,1.0,1.0,M3,NULL);
520: }
521: }
523: /* T21 */
524: MatDenseGetArray(Mk,&array);
525: for (i=1;i<deg;i++) {
526: s = (i==1)?0.0:1.0;
527: ss = PetscConj(fh[i]);
528: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
529: }
530: MatDenseRestoreArray(Mk,&array);
531: BVSetActiveColumns(M3,0,k);
532: BVMult(M3,1.0,0.0,V,Mk);
533: for (i=0;i<k;i++) {
534: BVGetColumn(M3,i,&vc);
535: VecConjugate(vc);
536: BVRestoreColumn(M3,i,&vc);
537: }
539: KSPSetOperators(ksp,M1,M1);
540: KSPSetUp(ksp);
541: MatDestroy(&Mk);
543: /* Set up for BEMW */
544: for (i=0;i<k;i++) {
545: BVGetColumn(M2,i,&vc);
546: BVGetColumn(W,i,&vc2);
547: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
548: BVRestoreColumn(M2,i,&vc);
549: BVGetColumn(M3,i,&vc);
550: VecConjugate(vc);
551: VecDot(vc2,vc,&d[i]);
552: VecConjugate(vc);
553: BVRestoreColumn(M3,i,&vc);
554: for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
555: d[i] = M4[i+i*k]-d[i];
556: BVRestoreColumn(W,i,&vc2);
558: BVGetColumn(M3,i,&vc);
559: BVGetColumn(Wt,i,&vc2);
560: for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
561: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
562: BVRestoreColumn(M3,i,&vc);
563: BVGetColumn(M2,i,&vc);
564: VecConjugate(vc2);
565: VecDot(vc,vc2,&dt[i]);
566: VecConjugate(vc2);
567: BVRestoreColumn(M2,i,&vc);
568: for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
569: dt[i] = M4[i+i*k]-dt[i];
570: BVRestoreColumn(Wt,i,&vc2);
571: }
573: if (flg) {
574: PetscFree(At);
575: }
576: PetscFree3(T12,Tr,Ts);
577: return(0);
578: }
582: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx,BV W)
583: {
584: PetscErrorCode ierr;
585: PetscInt i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
586: PetscInt *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
587: Mat M,*E=matctx->E,*A,*At,Mk,Md;
588: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
589: PetscScalar s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
590: PetscBLASInt lds_,lda_,k_;
591: const PetscInt *idxmc;
592: const PetscScalar *valsc,*carray;
593: MatStructure str;
594: Vec vc,vc0;
595: PetscBool flg;
598: PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
599: STGetMatStructure(pep->st,&str);
600: KSPGetOperators(ksp,&M,NULL);
601: MatGetOwnershipRange(E[1],&n1,&m1);
602: MatGetOwnershipRange(E[0],&n0,&m0);
603: MatGetOwnershipRange(M,&n,NULL);
604: PetscMalloc1(nmat,&ts);
605: STGetTransform(pep->st,&flg);
606: if (flg) {
607: PetscMalloc1(pep->nmat,&At);
608: for (i=0;i<pep->nmat;i++) {
609: STGetTOperators(pep->st,i,&At[i]);
610: }
611: } else At = pep->A;
612: if (matctx->subc) A = matctx->A;
613: else A = At;
614: /* Form the explicit system matrix */
615: DHii = T12;
616: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
617: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
618: for (d=2;d<nmat;d++) {
619: for (j=0;j<k;j++) {
620: for (i=0;i<k;i++) {
621: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
622: }
623: }
624: }
626: /* T11 */
627: if (!matctx->compM1) {
628: MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
629: PEPEvaluateBasis(pep,h,0,Ts,NULL);
630: for (j=1;j<nmat;j++) {
631: MatAXPY(E[0],Ts[j],A[j],str);
632: }
633: }
634: for (i=n0;i<m0;i++) {
635: MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
636: idx = n+i-n0;
637: for (j=0;j<ncols;j++) {
638: idxg[j] = matctx->map0[idxmc[j]];
639: }
640: MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
641: MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
642: }
644: /* T22 */
645: PetscBLASIntCast(lds,&lds_);
646: PetscBLASIntCast(k,&k_);
647: PetscBLASIntCast(lda,&lda_);
648: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
649: for (i=1;i<deg;i++) {
650: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
651: s = (i==1)?0.0:1.0;
652: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
653: }
654: for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
655: for (i=0;i<m1-n1;i++) {
656: idx = n+m0-n0+i;
657: for (j=0;j<k;j++) {
658: Tr[j] = T22[n1+i+j*k];
659: }
660: MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
661: }
663: /* T21 */
664: for (i=1;i<deg;i++) {
665: s = (i==1)?0.0:1.0;
666: ss = PetscConj(fh[i]);
667: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
668: }
669: BVSetActiveColumns(W,0,k);
670: MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
671: BVMult(W,1.0,0.0,V,Mk);
672: for (i=0;i<k;i++) {
673: BVGetColumn(W,i,&vc);
674: VecConjugate(vc);
675: VecGetArrayRead(vc,&carray);
676: idx = matctx->map1[i];
677: MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
678: VecRestoreArrayRead(vc,&carray);
679: BVRestoreColumn(W,i,&vc);
680: }
682: /* T12 */
683: for (i=1;i<nmat;i++) {
684: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
685: for (j=0;j<k;j++) {
686: PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
687: }
688: }
689: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
690: for (i=0;i<nmat;i++) ts[i] = 1.0;
691: for (j=0;j<k;j++) {
692: MatDenseGetArray(Md,&array);
693: PetscMemcpy(array,T12+k+j*lda,(nmat-1)*k*sizeof(PetscScalar));
694: MatDenseRestoreArray(Md,&array);
695: BVSetActiveColumns(W,0,nmat-1);
696: BVMult(W,1.0,0.0,V,Md);
697: for (i=nmat-1;i>0;i--) {
698: BVGetColumn(W,i-1,&vc0);
699: BVGetColumn(W,i,&vc);
700: MatMult(A[i],vc0,vc);
701: BVRestoreColumn(W,i-1,&vc0);
702: BVRestoreColumn(W,i,&vc);
703: }
704: BVSetActiveColumns(W,1,nmat);
705: BVGetColumn(W,0,&vc0);
706: BVMultVec(W,1.0,0.0,vc0,ts);
707: VecGetArrayRead(vc0,&carray);
708: idx = matctx->map1[j];
709: MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
710: VecRestoreArrayRead(vc0,&carray);
711: BVRestoreColumn(W,0,&vc0);
712: }
713: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
714: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
715: KSPSetOperators(ksp,M,M);
716: KSPSetUp(ksp);
717: PetscFree(ts);
718: MatDestroy(&Mk);
719: MatDestroy(&Md);
720: if (flg) {
721: PetscFree(At);
722: }
723: PetscFree5(T22,T21,T12,Tr,Ts);
724: return(0);
725: }
729: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx)
730: {
731: PetscErrorCode ierr;
732: PetscInt n0,m0,n1,m1,i;
733: PetscScalar *arrayV;
734: const PetscScalar *array;
735: KSPConvergedReason reason;
738: MatGetOwnershipRange(matctx->E[1],&n1,&m1);
739: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
741: /* Right side */
742: VecGetArrayRead(Rv,&array);
743: VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
744: VecRestoreArrayRead(Rv,&array);
745: VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
746: VecAssemblyBegin(matctx->tN);
747: VecAssemblyEnd(matctx->tN);
749: /* Solve */
750: KSPSolve(ksp,matctx->tN,matctx->ttN);
751: KSPGetConvergedReason(ksp,&reason);
752: if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
754: /* Retrieve solution */
755: VecGetArray(dVi,&arrayV);
756: VecGetArrayRead(matctx->ttN,&array);
757: PetscMemcpy(arrayV,array,(m0-n0)*sizeof(PetscScalar));
758: VecRestoreArray(dVi,&arrayV);
759: if (!matctx->subc) {
760: VecGetArray(matctx->t1,&arrayV);
761: for (i=0;i<m1-n1;i++) arrayV[i] = array[m0-n0+i];
762: VecRestoreArray(matctx->t1,&arrayV);
763: VecRestoreArrayRead(matctx->ttN,&array);
764: VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
765: VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
766: VecGetArrayRead(matctx->vseq,&array);
767: for (i=0;i<k;i++) dHi[i] = array[i];
768: VecRestoreArrayRead(matctx->vseq,&array);
769: }
770: return(0);
771: }
775: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx,BV W)
776: {
777: PetscErrorCode ierr;
778: PetscInt j,m,lda=pep->nmat*k,n0,m0,idx;
779: PetscMPIInt root,len;
780: PetscScalar *array2,h;
781: const PetscScalar *array;
782: Vec R,Vi;
783: FSubctx *ctx;
784: Mat M;
787: if (!matctx || !matctx->subc) {
788: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
789: h = H[i+i*ldh];
790: idx = i;
791: R = Rv;
792: Vi = dVi;
793: switch (pep->scheme) {
794: case PEP_REFINE_SCHEME_EXPLICIT:
795: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
796: matctx->compM1 = PETSC_FALSE;
797: break;
798: case PEP_REFINE_SCHEME_MBE:
799: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
800: matctx->compM1 = PETSC_FALSE;
801: break;
802: case PEP_REFINE_SCHEME_SCHUR:
803: KSPGetOperators(ksp,&M,NULL);
804: MatShellGetContext(M,&ctx);
805: NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
806: ctx->compM1 = PETSC_FALSE;
807: break;
808: }
809: } else {
810: if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
811: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
812: h = H[idx+idx*ldh];
813: matctx->idx = idx;
814: switch (pep->scheme) {
815: case PEP_REFINE_SCHEME_EXPLICIT:
816: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
817: matctx->compM1 = PETSC_FALSE;
818: break;
819: case PEP_REFINE_SCHEME_MBE:
820: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
821: matctx->compM1 = PETSC_FALSE;
822: break;
823: case PEP_REFINE_SCHEME_SCHUR:
824: break;
825: }
826: } else idx = matctx->idx;
827: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
828: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
829: VecGetArrayRead(matctx->tg,&array);
830: VecPlaceArray(matctx->t,array);
831: VecCopy(matctx->t,matctx->Rv);
832: VecResetArray(matctx->t);
833: VecRestoreArrayRead(matctx->tg,&array);
834: R = matctx->Rv;
835: Vi = matctx->Vi;
836: }
837: if (idx==i && idx<k) {
838: switch (pep->scheme) {
839: case PEP_REFINE_SCHEME_EXPLICIT:
840: NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
841: break;
842: case PEP_REFINE_SCHEME_MBE:
843: NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
844: break;
845: case PEP_REFINE_SCHEME_SCHUR:
846: NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
847: break;
848: }
849: }
850: if (matctx && matctx->subc) {
851: VecGetLocalSize(Vi,&m);
852: VecGetArrayRead(Vi,&array);
853: VecGetArray(matctx->tg,&array2);
854: PetscMemcpy(array2,array,m*sizeof(PetscScalar));
855: VecRestoreArray(matctx->tg,&array2);
856: VecRestoreArrayRead(Vi,&array);
857: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
858: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
859: switch (pep->scheme) {
860: case PEP_REFINE_SCHEME_EXPLICIT:
861: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
862: VecGetArrayRead(matctx->ttN,&array);
863: VecPlaceArray(matctx->tp,array+m0-n0);
864: VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
865: VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
866: VecResetArray(matctx->tp);
867: VecRestoreArrayRead(matctx->ttN,&array);
868: VecGetArrayRead(matctx->tpg,&array);
869: for (j=0;j<k;j++) dHi[j] = array[j];
870: VecRestoreArrayRead(matctx->tpg,&array);
871: break;
872: case PEP_REFINE_SCHEME_MBE:
873: root = 0;
874: for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
875: PetscMPIIntCast(k,&len);
876: MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
877: break;
878: case PEP_REFINE_SCHEME_SCHUR:
879: break;
880: }
881: }
882: return(0);
883: }
887: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,MatExplicitCtx *matctx)
888: {
890: PetscInt i,nmat=pep->nmat,lda=nmat*k;
891: PetscScalar *fh,*Rh,*DfH;
892: PetscReal norm;
893: BV W;
894: Vec Rv,t,dvi;
895: FSubctx *ctx;
896: Mat M,*At;
897: PetscBool flg,lindep;
900: PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
901: *rds = 0;
902: BVCreateVec(pep->V,&Rv);
903: switch (pep->scheme) {
904: case PEP_REFINE_SCHEME_EXPLICIT:
905: BVCreateVec(pep->V,&t);
906: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
907: PetscMalloc1(nmat,&fh);
908: break;
909: case PEP_REFINE_SCHEME_MBE:
910: if (matctx->subc) {
911: BVCreateVec(pep->V,&t);
912: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
913: } else {
914: W = matctx->W;
915: PetscObjectReference((PetscObject)W);
916: t = matctx->t;
917: PetscObjectReference((PetscObject)t);
918: }
919: BVScale(matctx->W,0.0);
920: BVScale(matctx->Wt,0.0);
921: BVScale(matctx->M2,0.0);
922: BVScale(matctx->M3,0.0);
923: PetscMalloc1(nmat,&fh);
924: break;
925: case PEP_REFINE_SCHEME_SCHUR:
926: KSPGetOperators(ksp,&M,NULL);
927: MatShellGetContext(M,&ctx);
928: BVCreateVec(pep->V,&t);
929: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
930: fh = ctx->fih;
931: break;
932: }
933: PetscMemzero(dVS,2*k*k*sizeof(PetscScalar));
934: PetscMemzero(DfH,lda*k*sizeof(PetscScalar));
935: STGetTransform(pep->st,&flg);
936: if (flg) {
937: PetscMalloc1(pep->nmat,&At);
938: for (i=0;i<pep->nmat;i++) {
939: STGetTOperators(pep->st,i,&At[i]);
940: }
941: } else At = pep->A;
943: /* Main loop for computing the ith columns of dX and dS */
944: for (i=0;i<k;i++) {
945: /* Compute and update i-th column of the right hand side */
946: PetscMemzero(Rh,k*sizeof(PetscScalar));
947: NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);
949: /* Update and solve system */
950: BVGetColumn(dV,i,&dvi);
951: NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
952: /* Orthogonalize computed solution */
953: BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
954: BVRestoreColumn(dV,i,&dvi);
955: if (!lindep) {
956: BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
957: if (!lindep) {
958: dVS[k+i+i*2*k] = norm;
959: BVScaleColumn(dV,i,1.0/norm);
960: (*rds)++;
961: }
962: }
963: }
964: BVSetActiveColumns(dV,0,*rds);
965: VecDestroy(&t);
966: VecDestroy(&Rv);
967: BVDestroy(&W);
968: if (flg) {
969: PetscFree(At);
970: }
971: PetscFree2(DfH,Rh);
972: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) { PetscFree(fh); }
973: return(0);
974: }
978: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscInt *prs)
979: {
980: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
982: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routine is unavailable");
983: #else
985: PetscInt i,j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,rs=*prs,ldg;
986: PetscScalar *T,*G,*tau,*array,sone=1.0,zero=0.0,*work;
987: PetscBLASInt rs_,lds_,k_,ldh_,info,ldg_,lda_;
988: Mat M0;
991: PetscMalloc4(rs*k,&T,k,&tau,k,&work,deg*k*k,&G);
992: PetscBLASIntCast(lds,&lds_);
993: PetscBLASIntCast(lda,&lda_);
994: PetscBLASIntCast(k,&k_);
995: if (rs>k) { /* Truncate S to have k columns*/
996: for (j=0;j<k;j++) {
997: PetscMemcpy(T+j*rs,S+j*lds,rs*sizeof(PetscScalar));
998: }
999: PetscBLASIntCast(rs,&rs_);
1000: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&rs_,&k_,T,&rs_,tau,work,&k_,&info));
1001: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
1002: /* Copy triangular matrix in S */
1003: PetscMemzero(S,lds*k*sizeof(PetscScalar));
1004: for (j=0;j<k;j++) for (i=0;i<=j;i++) S[j*lds+i] = T[j*rs+i];
1005: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&rs_,&k_,&k_,T,&rs_,tau,work,&k_,&info));
1006: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
1007: MatCreateSeqDense(PETSC_COMM_SELF,rs,k,NULL,&M0);
1008: MatDenseGetArray(M0,&array);
1009: for (j=0;j<k;j++) {
1010: PetscMemcpy(array+j*rs,T+j*rs,rs*sizeof(PetscScalar));
1011: }
1012: MatDenseRestoreArray(M0,&array);
1013: BVSetActiveColumns(pep->V,0,rs);
1014: BVMultInPlace(pep->V,M0,0,k);
1015: BVSetActiveColumns(pep->V,0,k);
1016: MatDestroy(&M0);
1017: *prs = rs = k;
1018: }
1019: /* Form auxiliary matrix for the orthogonalization step */
1020: ldg = deg*k;
1021: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1022: PetscBLASIntCast(ldg,&ldg_);
1023: PetscBLASIntCast(ldh,&ldh_);
1024: for (j=0;j<deg;j++) {
1025: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
1026: }
1027: /* Orthogonalize and update S */
1028: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
1029: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
1030: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));
1032: /* Update H */
1033: PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
1034: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
1035: PetscFree4(T,tau,work,G);
1036: return(0);
1037: #endif
1038: }
1042: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
1043: {
1044: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
1046: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routine is unavailable");
1047: #else
1049: PetscInt i,j,nmat=pep->nmat,lda=nmat*k;
1050: PetscScalar *tau,*array,*work;
1051: PetscBLASInt lds_,k_,lda_,ldh_,kdrs_,info,k2_;
1052: Mat M0;
1055: PetscMalloc2(k,&tau,k,&work);
1056: PetscBLASIntCast(lds,&lds_);
1057: PetscBLASIntCast(lda,&lda_);
1058: PetscBLASIntCast(ldh,&ldh_);
1059: PetscBLASIntCast(k,&k_);
1060: PetscBLASIntCast(2*k,&k2_);
1061: PetscBLASIntCast((k+rds),&kdrs_);
1062: /* Update H */
1063: for (j=0;j<k;j++) {
1064: for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
1065: }
1066: /* Update V */
1067: for (j=0;j<k;j++) {
1068: for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
1069: for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
1070: }
1071: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
1072: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
1073: /* Copy triangular matrix in S */
1074: for (j=0;j<k;j++) {
1075: for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
1076: for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
1077: }
1078: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
1079: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
1080: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
1081: MatDenseGetArray(M0,&array);
1082: for (j=0;j<k;j++) {
1083: PetscMemcpy(array+j*k,dVS+j*2*k,k*sizeof(PetscScalar));
1084: }
1085: MatDenseRestoreArray(M0,&array);
1086: BVMultInPlace(pep->V,M0,0,k);
1087: if (rds) {
1088: MatDenseGetArray(M0,&array);
1089: for (j=0;j<k;j++) {
1090: PetscMemcpy(array+j*k,dVS+k+j*2*k,rds*sizeof(PetscScalar));
1091: }
1092: MatDenseRestoreArray(M0,&array);
1093: BVMultInPlace(dV,M0,0,k);
1094: BVMult(pep->V,1.0,1.0,dV,NULL);
1095: }
1096: MatDestroy(&M0);
1097: NRefOrthogStep(pep,k,H,ldh,fH,S,lds,&k);
1098: PetscFree2(tau,work);
1099: return(0);
1100: #endif
1101: }
1105: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,MatExplicitCtx *matctx,PetscBool ini)
1106: {
1107: PetscErrorCode ierr;
1108: FSubctx *ctx;
1109: PetscScalar t,*coef;
1110: const PetscScalar *array;
1111: MatStructure str;
1112: PetscInt j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
1113: MPI_Comm comm;
1114: PetscMPIInt np;
1115: const PetscInt *rgs0,*rgs1;
1116: Mat B,C,*E,*A,*At;
1117: IS is1,is2;
1118: Vec v;
1119: PetscBool flg;
1120: Mat M,P;
1123: PetscMalloc1(nmat,&coef);
1124: STGetTransform(pep->st,&flg);
1125: if (flg) {
1126: PetscMalloc1(pep->nmat,&At);
1127: for (i=0;i<pep->nmat;i++) {
1128: STGetTOperators(pep->st,i,&At[i]);
1129: }
1130: } else At = pep->A;
1131: switch (pep->scheme) {
1132: case PEP_REFINE_SCHEME_EXPLICIT:
1133: if (ini) {
1134: if (matctx->subc) {
1135: A = matctx->A;
1136: comm = PetscSubcommChild(matctx->subc);
1137: } else {
1138: A = At;
1139: PetscObjectGetComm((PetscObject)pep,&comm);
1140: }
1141: E = matctx->E;
1142: STGetMatStructure(pep->st,&str);
1143: MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1144: j = (matctx->subc)?matctx->subc->color:0;
1145: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1146: for (j=1;j<nmat;j++) {
1147: MatAXPY(E[0],coef[j],A[j],str);
1148: }
1149: MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1150: MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
1151: MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
1152: MatGetOwnershipRange(E[0],&n0,&m0);
1153: MatGetOwnershipRange(E[1],&n1,&m1);
1154: MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1155: MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1156: /* T12 and T21 are computed from V and V*, so,
1157: they must have the same column and row ranges */
1158: if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
1159: MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1160: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1161: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1162: MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1163: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1164: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1165: SlepcMatTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1166: MatDestroy(&B);
1167: MatDestroy(&C);
1168: matctx->compM1 = PETSC_TRUE;
1169: MatGetSize(E[0],NULL,&N0);
1170: MatGetSize(E[1],NULL,&N1);
1171: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1172: MatGetOwnershipRanges(E[0],&rgs0);
1173: MatGetOwnershipRanges(E[1],&rgs1);
1174: PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1175: /* Create column (and row) mapping */
1176: for (p=0;p<np;p++) {
1177: for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1178: for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1179: }
1180: MatCreateVecs(M,NULL,&matctx->tN);
1181: MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1182: VecDuplicate(matctx->tN,&matctx->ttN);
1183: if (matctx->subc) {
1184: MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1185: count = np*k;
1186: PetscMalloc2(count,&idx1,count,&idx2);
1187: VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1188: VecGetOwnershipRange(matctx->tp,&l0,NULL);
1189: VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1190: for (si=0;si<matctx->subc->n;si++) {
1191: if (matctx->subc->color==si) {
1192: j=0;
1193: if (matctx->subc->color==si) {
1194: for (p=0;p<np;p++) {
1195: for (i=n1;i<m1;i++) {
1196: idx1[j] = l0+i-n1;
1197: idx2[j++] =p*k+i;
1198: }
1199: }
1200: }
1201: count = np*(m1-n1);
1202: } else count =0;
1203: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1204: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1205: VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1206: ISDestroy(&is1);
1207: ISDestroy(&is2);
1208: }
1209: PetscFree2(idx1,idx2);
1210: } else {
1211: VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1212: }
1213: P = M;
1214: } else {
1215: if (matctx->subc) {
1216: /* Scatter vectors pep->V */
1217: for (i=0;i<k;i++) {
1218: BVGetColumn(pep->V,i,&v);
1219: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1220: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1221: BVRestoreColumn(pep->V,i,&v);
1222: VecGetArrayRead(matctx->tg,&array);
1223: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1224: BVInsertVec(matctx->V,i,matctx->t);
1225: VecResetArray(matctx->t);
1226: VecRestoreArrayRead(matctx->tg,&array);
1227: }
1228: }
1229: }
1230: break;
1231: case PEP_REFINE_SCHEME_MBE:
1232: if (ini) {
1233: if (matctx->subc) {
1234: A = matctx->A;
1235: comm = PetscSubcommChild(matctx->subc);
1236: } else {
1237: matctx->V = pep->V;
1238: A = At;
1239: PetscObjectGetComm((PetscObject)pep,&comm);
1240: MatCreateVecs(pep->A[0],&matctx->t,NULL);
1241: }
1242: STGetMatStructure(pep->st,&str);
1243: MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1244: j = (matctx->subc)?matctx->subc->color:0;
1245: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1246: for (j=1;j<nmat;j++) {
1247: MatAXPY(matctx->M1,coef[j],A[j],str);
1248: }
1249: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1250: BVDuplicateResize(matctx->V,k,&matctx->M2);
1251: BVDuplicate(matctx->M2,&matctx->M3);
1252: BVDuplicate(matctx->M2,&matctx->Wt);
1253: PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1254: matctx->compM1 = PETSC_TRUE;
1255: M = matctx->M1;
1256: P = M;
1257: }
1258: break;
1259: case PEP_REFINE_SCHEME_SCHUR:
1260: if (ini) {
1261: PetscObjectGetComm((PetscObject)pep,&comm);
1262: MatGetSize(At[0],&m0,&n0);
1263: PetscMalloc1(1,&ctx);
1264: STGetMatStructure(pep->st,&str);
1265: /* Create a shell matrix to solve the linear system */
1266: ctx->A = At;
1267: ctx->V = pep->V;
1268: ctx->k = k; ctx->nmat = nmat;
1269: PetscMalloc4(k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1270: PetscMemzero(ctx->M4,k*k*sizeof(PetscScalar));
1271: MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1272: MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatFSMult);
1273: BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1274: BVDuplicateResize(ctx->V,k,&ctx->M2);
1275: BVDuplicate(ctx->M2,&ctx->M3);
1276: BVCreateVec(pep->V,&ctx->t);
1277: MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1278: PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1279: for (j=1;j<nmat;j++) {
1280: MatAXPY(ctx->M1,coef[j],At[j],str);
1281: }
1282: MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1283: /* Compute a precond matrix for the system */
1284: t = H[0];
1285: PEPEvaluateBasis(pep,t,0,coef,NULL);
1286: for (j=1;j<nmat;j++) {
1287: MatAXPY(P,coef[j],At[j],str);
1288: }
1289: ctx->compM1 = PETSC_TRUE;
1290: }
1291: break;
1292: }
1293: if (ini) {
1294: PEPRefineGetKSP(pep,&pep->refineksp);
1295: KSPSetOperators(pep->refineksp,M,P);
1296: KSPSetFromOptions(pep->refineksp);
1297: }
1299: if (!ini && matctx && matctx->subc) {
1300: /* Scatter vectors pep->V */
1301: for (i=0;i<k;i++) {
1302: BVGetColumn(pep->V,i,&v);
1303: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1304: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1305: BVRestoreColumn(pep->V,i,&v);
1306: VecGetArrayRead(matctx->tg,&array);
1307: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1308: BVInsertVec(matctx->V,i,matctx->t);
1309: VecResetArray(matctx->t);
1310: VecRestoreArrayRead(matctx->tg,&array);
1311: }
1312: }
1313: PetscFree(coef);
1314: if (flg) {
1315: PetscFree(At);
1316: }
1317: return(0);
1318: }
1322: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,MatExplicitCtx *matctx,PetscInt nsubc)
1323: {
1324: PetscErrorCode ierr;
1325: PetscInt i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1326: IS is1,is2;
1327: BVType type;
1328: Vec v;
1329: const PetscScalar *array;
1330: Mat *A;
1331: PetscBool flg;
1334: STGetTransform(pep->st,&flg);
1335: if (flg) {
1336: PetscMalloc1(pep->nmat,&A);
1337: for (i=0;i<pep->nmat;i++) {
1338: STGetTOperators(pep->st,i,&A[i]);
1339: }
1340: } else A = pep->A;
1342: /* Duplicate pep matrices */
1343: PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1344: for (i=0;i<pep->nmat;i++) {
1345: MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1346: }
1348: /* Create Scatter */
1349: MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1350: MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1351: VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1352: BVGetColumn(pep->V,0,&v);
1353: VecGetOwnershipRange(v,&n0,&m0);
1354: nloc0 = m0-n0;
1355: PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1356: j = 0;
1357: for (si=0;si<matctx->subc->n;si++) {
1358: for (i=n0;i<m0;i++) {
1359: idx1[j] = i;
1360: idx2[j++] = i+pep->n*si;
1361: }
1362: }
1363: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1364: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1365: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1366: ISDestroy(&is1);
1367: ISDestroy(&is2);
1368: for (si=0;si<matctx->subc->n;si++) {
1369: j=0;
1370: for (i=n0;i<m0;i++) {
1371: idx1[j] = i;
1372: idx2[j++] = i+pep->n*si;
1373: }
1374: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1375: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1376: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1377: ISDestroy(&is1);
1378: ISDestroy(&is2);
1379: }
1380: BVRestoreColumn(pep->V,0,&v);
1381: PetscFree2(idx1,idx2);
1383: /* Duplicate pep->V vecs */
1384: BVGetType(pep->V,&type);
1385: BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1386: BVSetType(matctx->V,type);
1387: BVSetSizesFromVec(matctx->V,matctx->t,k);
1388: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1389: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1390: }
1391: for (i=0;i<k;i++) {
1392: BVGetColumn(pep->V,i,&v);
1393: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1394: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1395: BVRestoreColumn(pep->V,i,&v);
1396: VecGetArrayRead(matctx->tg,&array);
1397: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1398: BVInsertVec(matctx->V,i,matctx->t);
1399: VecResetArray(matctx->t);
1400: VecRestoreArrayRead(matctx->tg,&array);
1401: }
1403: VecDuplicate(matctx->t,&matctx->Rv);
1404: VecDuplicate(matctx->t,&matctx->Vi);
1405: if (flg) {
1406: PetscFree(A);
1407: }
1408: return(0);
1409: }
1413: static PetscErrorCode NRefSubcommDestroy(PEP pep,MatExplicitCtx *matctx)
1414: {
1416: PetscInt i;
1419: VecScatterDestroy(&matctx->scatter_sub);
1420: for (i=0;i<matctx->subc->n;i++) {
1421: VecScatterDestroy(&matctx->scatter_id[i]);
1422: }
1423: for (i=0;i<pep->nmat;i++) {
1424: MatDestroy(&matctx->A[i]);
1425: }
1426: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1427: for (i=0;i<matctx->subc->n;i++) {
1428: VecScatterDestroy(&matctx->scatterp_id[i]);
1429: }
1430: VecDestroy(&matctx->tp);
1431: VecDestroy(&matctx->tpg);
1432: BVDestroy(&matctx->W);
1433: }
1434: PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1435: BVDestroy(&matctx->V);
1436: VecDestroy(&matctx->t);
1437: VecDestroy(&matctx->tg);
1438: VecDestroy(&matctx->Rv);
1439: VecDestroy(&matctx->Vi);
1440: return(0);
1441: }
1445: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds,PetscInt *prs)
1446: {
1447: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI)
1449: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI - Lapack routine is unavailable");
1450: #else
1452: PetscScalar *H,*work,*dH,*fH,*dVS;
1453: PetscInt ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1454: PetscBLASInt k_,ld_,*p,info;
1455: BV dV;
1456: PetscBool sinvert,flg;
1457: MatExplicitCtx *matctx=NULL;
1458: Vec v;
1459: Mat M,P;
1460: FSubctx *ctx;
1463: PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1464: if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1465: /* the input tolerance is not being taken into account (by the moment) */
1466: its = *maxits;
1467: PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1468: DSGetLeadingDimension(pep->ds,&ldh);
1469: DSGetArray(pep->ds,DS_MAT_A,&H);
1470: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1471: PetscMalloc1(2*k*k,&dVS);
1472: STGetTransform(pep->st,&flg);
1473: if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1474: PetscBLASIntCast(k,&k_);
1475: PetscBLASIntCast(ldh,&ld_);
1476: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1477: if (sinvert) {
1478: DSGetArray(pep->ds,DS_MAT_A,&H);
1479: PetscMalloc1(k,&p);
1480: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1481: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1482: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1483: pep->ops->backtransform = NULL;
1484: }
1485: if (sigma!=0.0) {
1486: DSGetArray(pep->ds,DS_MAT_A,&H);
1487: for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1488: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1489: pep->ops->backtransform = NULL;
1490: }
1491: }
1492: if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1493: DSGetArray(pep->ds,DS_MAT_A,&H);
1494: for (j=0;j<k;j++) {
1495: for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1496: }
1497: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1498: if (!flg) {
1499: /* Restore original values */
1500: for (i=0;i<pep->nmat;i++){
1501: pep->pbc[pep->nmat+i] *= pep->sfactor;
1502: pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1503: }
1504: }
1505: }
1506: if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1507: for (i=0;i<k;i++) {
1508: BVGetColumn(pep->V,i,&v);
1509: VecPointwiseMult(v,v,pep->Dr);
1510: BVRestoreColumn(pep->V,i,&v);
1511: }
1512: }
1513: DSGetArray(pep->ds,DS_MAT_A,&H);
1515: NRefOrthogStep(pep,k,H,ldh,fH,S,lds,prs);
1516: /* check if H is in Schur form */
1517: for (i=0;i<k-1;i++) {
1518: if (H[i+1+i*ldh]!=0.0) {
1519: #if !defined(PETSC_USES_COMPLEX)
1520: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires the complex Schur form of the projected matrix");
1521: #else
1522: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1523: #endif
1524: }
1525: }
1526: if (nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair dimension");
1527: BVSetActiveColumns(pep->V,0,k);
1528: BVDuplicateResize(pep->V,k,&dV);
1529: PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1530: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1531: PetscMalloc1(1,&matctx);
1532: if (nsubc>1) { /* spliting in subcommunicators */
1533: matctx->subc = pep->refinesubc;
1534: NRefSubcommSetup(pep,k,matctx,nsubc);
1535: } else matctx->subc=NULL;
1536: }
1538: /* Loop performing iterative refinements */
1539: for (i=0;i<its;i++) {
1540: /* Pre-compute the polynomial basis evaluated in H */
1541: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1542: PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1543: /* Solve the linear system */
1544: PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1545: /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1546: PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1547: }
1548: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1549: if (!flg && sinvert) {
1550: PetscFree(p);
1551: }
1552: PetscFree3(dH,fH,work);
1553: PetscFree(dVS);
1554: BVDestroy(&dV);
1555: switch (pep->scheme) {
1556: case PEP_REFINE_SCHEME_EXPLICIT:
1557: for (i=0;i<2;i++) {
1558: MatDestroy(&matctx->E[i]);
1559: }
1560: PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1561: VecDestroy(&matctx->tN);
1562: VecDestroy(&matctx->ttN);
1563: VecDestroy(&matctx->t1);
1564: if (nsubc>1) {
1565: NRefSubcommDestroy(pep,matctx);
1566: } else {
1567: VecDestroy(&matctx->vseq);
1568: VecScatterDestroy(&matctx->scatterctx);
1569: }
1570: PetscFree(matctx);
1571: KSPGetOperators(pep->refineksp,&M,NULL);
1572: MatDestroy(&M);
1573: break;
1574: case PEP_REFINE_SCHEME_MBE:
1575: BVDestroy(&matctx->W);
1576: BVDestroy(&matctx->Wt);
1577: BVDestroy(&matctx->M2);
1578: BVDestroy(&matctx->M3);
1579: MatDestroy(&matctx->M1);
1580: VecDestroy(&matctx->t);
1581: PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1582: if (nsubc>1) {
1583: NRefSubcommDestroy(pep,matctx);
1584: }
1585: PetscFree(matctx);
1586: break;
1587: case PEP_REFINE_SCHEME_SCHUR:
1588: KSPGetOperators(pep->refineksp,&M,&P);
1589: MatShellGetContext(M,&ctx);
1590: PetscFree4(ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1591: MatDestroy(&ctx->M1);
1592: BVDestroy(&ctx->M2);
1593: BVDestroy(&ctx->M3);
1594: BVDestroy(&ctx->W);
1595: VecDestroy(&ctx->t);
1596: PetscFree(ctx);
1597: MatDestroy(&M);
1598: MatDestroy(&P);
1599: break;
1600: }
1601: PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1602: return(0);
1603: #endif
1604: }