Actual source code: nrefine.c

slepc-3.7.0 2016-05-16
Report Typos and Errors
  1: /*
  2:    Newton refinement for polynomial eigenproblems.

  4:    References:

  6:        [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
  7:            of invariant pairs for matrix polynomials", Linear Algebra Appl.
  8:            435(3):514-536, 2011.

 10:        [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
 11:            polynomial eigenvalue problems", submitted, 2015.

 13:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 14:    SLEPc - Scalable Library for Eigenvalue Problem Computations
 15:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

 17:    This file is part of SLEPc.

 19:    SLEPc is free software: you can redistribute it and/or modify it under  the
 20:    terms of version 3 of the GNU Lesser General Public License as published by
 21:    the Free Software Foundation.

 23:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 24:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 25:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 26:    more details.

 28:    You  should have received a copy of the GNU Lesser General  Public  License
 29:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 30:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 31: */

 33: #include <slepc/private/pepimpl.h>
 34: #include <slepcblaslapack.h>

 36: typedef struct {
 37:   Mat          *A,M1;
 38:   BV           V,M2,M3,W;
 39:   PetscInt     k,nmat;
 40:   PetscScalar  *fih,*work,*M4;
 41:   PetscBLASInt *pM4;
 42:   PetscBool    compM1;
 43:   Vec          t;
 44: } FSubctx;

 46: typedef struct {
 47:   Mat          E[2],M1;
 48:   Vec          tN,ttN,t1,vseq;
 49:   VecScatter   scatterctx;
 50:   PetscBool    compM1;
 51:   PetscInt     *map0,*map1,*idxg,*idxp;
 52:   PetscSubcomm subc;
 53:   VecScatter   scatter_sub;
 54:   VecScatter   *scatter_id,*scatterp_id;
 55:   Mat          *A;
 56:   BV           V,W,M2,M3,Wt;
 57:   PetscScalar  *M4,*w,*wt,*d,*dt;
 58:   Vec          t,tg,Rv,Vi,tp,tpg;
 59:   PetscInt     idx,*cols;
 60: } MatExplicitCtx;

 64: static PetscErrorCode MatFSMult(Mat M ,Vec x,Vec y)
 65: {
 66: #if defined(PETSC_MISSING_LAPACK_GETRS)
 68:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable");
 69: #else
 71:   FSubctx        *ctx;
 72:   PetscInt       k,i;
 73:   PetscScalar    *c;
 74:   PetscBLASInt   k_,one=1,info;

 77:   MatShellGetContext(M,&ctx);
 78:   VecCopy(x,ctx->t);
 79:   k    = ctx->k;
 80:   c    = ctx->work;
 81:   PetscBLASIntCast(k,&k_);
 82:   MatMult(ctx->M1,x,y);
 83:   VecConjugate(ctx->t);
 84:   BVDotVec(ctx->M3,ctx->t,c);
 85:   for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
 86:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
 87:   BVMultVec(ctx->M2,-1.0,1.0,y,c);
 88:   return(0);
 89: #endif
 90: }

 94: /*
 95:   Evaluates the first d elements of the polynomial basis
 96:   on a given matrix H which is considered to be triangular
 97: */
 98: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
 99: {
101:   PetscInt       i,j,ldfh=nm*k,off,nmat=pep->nmat;
102:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
103:   PetscScalar    corr=0.0,alpha,beta;
104:   PetscBLASInt   k_,ldh_,ldfh_;

107:   PetscBLASIntCast(ldh,&ldh_);
108:   PetscBLASIntCast(k,&k_);
109:   PetscBLASIntCast(ldfh,&ldfh_);
110:   PetscMemzero(fH,nm*k*k*sizeof(PetscScalar));
111:   if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
112:   if (nm>1) {
113:     t = b[0]/a[0];
114:     off = k;
115:     for (j=0;j<k;j++) {
116:       for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
117:       fH[j+j*ldfh] -= t;
118:     }
119:   }
120:   for (i=2;i<nm;i++) {
121:     off = i*k;
122:     if (i==2) {
123:       for (j=0;j<k;j++) {
124:         fH[off+j+j*ldfh] = 1.0;
125:         H[j+j*ldh] -= b[1];
126:       }
127:     } else {
128:       for (j=0;j<k;j++) {
129:         PetscMemcpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k*sizeof(PetscScalar));
130:         H[j+j*ldh] += corr-b[i-1];
131:       }
132:     }
133:     corr  = b[i-1];
134:     beta  = -g[i-1]/a[i-1];
135:     alpha = 1/a[i-1];
136:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
137:   }
138:   for (j=0;j<k;j++) H[j+j*ldh] += corr;
139:   return(0);
140: }

144: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,FSubctx *ctx)
145: {
146: #if defined(PETSC_MISSING_LAPACK_GESV)
148:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV - Lapack routine is unavailable");
149: #else
150:   PetscErrorCode    ierr;
151:   PetscScalar       *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
152:   const PetscScalar *m3,*m2;
153:   PetscInt          i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
154:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
155:   PetscBLASInt      k_,lda_,lds_,nloc_,one=1,info;
156:   Mat               *A=ctx->A,Mk,M1=ctx->M1,P;
157:   BV                V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
158:   MatStructure      str;
159:   Vec               vc;

162:   STGetMatStructure(pep->st,&str);
163:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
164:   DHii = T12;
165:   PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
166:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
167:   for (d=2;d<nmat;d++) {
168:     for (j=0;j<k;j++) {
169:       for (i=0;i<k;i++) {
170:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
171:       }
172:     }
173:   }
174:   /* T11 */
175:   if (!ctx->compM1) {
176:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
177:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
178:     for (j=1;j<nmat;j++) {
179:       MatAXPY(M1,Ts[j],A[j],str);
180:     }
181:   }

183:   /* T22 */
184:   PetscBLASIntCast(lds,&lds_);
185:   PetscBLASIntCast(k,&k_);
186:   PetscBLASIntCast(lda,&lda_);
187:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
188:   for (i=1;i<deg;i++) {
189:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
190:     s = (i==1)?0.0:1.0;
191:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
192:   }
193:   for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }

195:   /* T12 */
196:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
197:   for (i=1;i<nmat;i++) {
198:     MatDenseGetArray(Mk,&array);
199:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
200:     MatDenseRestoreArray(Mk,&array);
201:     BVSetActiveColumns(W,0,k);
202:     BVMult(W,1.0,0.0,V,Mk);
203:     if (i==1) {
204:       BVMatMult(W,A[i],M2);
205:     } else {
206:       BVMatMult(W,A[i],M3); /* using M3 as work space */
207:       BVMult(M2,1.0,1.0,M3,NULL);
208:     }
209:   }

211:   /* T21 */
212:   MatDenseGetArray(Mk,&array);
213:   for (i=1;i<deg;i++) {
214:     s = (i==1)?0.0:1.0;
215:     ss = PetscConj(fh[i]);
216:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
217:   }
218:   MatDenseRestoreArray(Mk,&array);
219:   BVSetActiveColumns(M3,0,k);
220:   BVMult(M3,1.0,0.0,V,Mk);
221:   for (i=0;i<k;i++) {
222:     BVGetColumn(M3,i,&vc);
223:     VecConjugate(vc);
224:     BVRestoreColumn(M3,i,&vc);
225:   }
226:   MatDestroy(&Mk);
227:   PetscFree3(T12,Tr,Ts);

229:   VecGetLocalSize(ctx->t,&nloc);
230:   PetscBLASIntCast(nloc,&nloc_);
231:   PetscMalloc1(nloc*k,&T);
232:   KSPGetOperators(pep->refineksp,NULL,&P);
233:   if (!ctx->compM1) { MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN); }
234:   BVGetArrayRead(ctx->M2,&m2);
235:   BVGetArrayRead(ctx->M3,&m3);
236:   VecGetArray(ctx->t,&v);
237:   for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
238:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
239:   for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
240:   VecRestoreArray(ctx->t,&v);
241:   BVRestoreArrayRead(ctx->M2,&m2);
242:   BVRestoreArrayRead(ctx->M3,&m3);
243:   MatDiagonalSet(P,ctx->t,ADD_VALUES);
244:   PetscFree(T);
245:   KSPSetUp(pep->refineksp);
246:   return(0);
247: #endif
248: }

252: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
253: {
254: #if defined(PETSC_MISSING_LAPACK_GETRS)
256:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable");
257: #else
258:   PetscErrorCode     ierr;
259:   PetscScalar        *t0;
260:   PetscBLASInt       k_,one=1,info,lda_;
261:   PetscInt           i,lda=nmat*k;
262:   Mat                M;
263:   FSubctx            *ctx;
264:   KSPConvergedReason reason;

267:   KSPGetOperators(ksp,&M,NULL);
268:   MatShellGetContext(M,&ctx);
269:   PetscCalloc1(k,&t0);
270:   PetscBLASIntCast(lda,&lda_);
271:   PetscBLASIntCast(k,&k_);
272:   for (i=0;i<k;i++) t0[i] = Rh[i];
273:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
274:   BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
275:   KSPSolve(ksp,Rv,dVi);
276:   KSPGetConvergedReason(ksp,&reason);
277:   if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
278:   VecConjugate(dVi);
279:   BVDotVec(ctx->M3,dVi,dHi);
280:   VecConjugate(dVi);
281:   for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
282:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
283:   PetscFree(t0);
284:   return(0);
285: #endif
286: }

290: /*
291:    Computes the residual P(H,V*S)*e_j for the polynomial
292: */
293: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
294: {
296:   PetscScalar    *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
297:   PetscReal      *a=pcf,*b=pcf+nmat,*g=b+nmat;
298:   PetscInt       i,ii,jj,lda;
299:   PetscBLASInt   lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
300:   Mat            M0;
301:   Vec            w;

304:   PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
305:   lda = k*nmat;
306:   PetscBLASIntCast(k,&k_);
307:   PetscBLASIntCast(lds,&lds_);
308:   PetscBLASIntCast(lda,&lda_);
309:   PetscBLASIntCast(nmat,&nmat_);
310:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
311:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
312:   BVSetActiveColumns(W,0,nmat);
313:   BVMult(W,1.0,0.0,V,M0);
314:   MatDestroy(&M0);

316:   BVGetColumn(W,0,&w);
317:   MatMult(A[0],w,Rv);
318:   BVRestoreColumn(W,0,&w);
319:   for (i=1;i<nmat;i++) {
320:     BVGetColumn(W,i,&w);
321:     MatMult(A[i],w,t);
322:     BVRestoreColumn(W,i,&w);
323:     VecAXPY(Rv,1.0,t);
324:   }
325:   /* Update right-hand side */
326:   if (j) {
327:     PetscBLASIntCast(ldh,&ldh_);
328:     PetscMemzero(Z,k*k*sizeof(PetscScalar));
329:     PetscMemzero(DS0,k*k*sizeof(PetscScalar));
330:     PetscMemcpy(Z+(j-1)*k,dH+(j-1)*k,k*sizeof(PetscScalar));
331:     /* Update DfH */
332:     for (i=1;i<nmat;i++) {
333:       if (i>1) {
334:         beta = -g[i-1];
335:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
336:         tt += -b[i-1];
337:         for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
338:         tt = b[i-1];
339:         beta = 1.0/a[i-1];
340:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
341:         F = DS0; DS0 = DS1; DS1 = F;
342:       } else {
343:         PetscMemzero(DS1,k*k*sizeof(PetscScalar));
344:         for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
345:       }
346:       for (jj=j;jj<k;jj++) {
347:         for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
348:       }
349:     }
350:     for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
351:     /* Update right-hand side */
352:     PetscBLASIntCast(2*k,&k2_);
353:     PetscBLASIntCast(j,&j_);
354:     PetscBLASIntCast(k+rds,&krds_);
355:     c0 = DS0;
356:     PetscMemzero(Rh,k*sizeof(PetscScalar));
357:     for (i=0;i<nmat;i++) {
358:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
359:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
360:       BVMultVec(V,1.0,0.0,t,h);
361:       BVSetActiveColumns(dV,0,rds);
362:       BVMultVec(dV,1.0,1.0,t,h+k);
363:       BVGetColumn(W,i,&w);
364:       MatMult(A[i],t,w);
365:       BVRestoreColumn(W,i,&w);
366:       if (i>0 && i<nmat-1) {
367:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
368:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
369:       }
370:     }

372:     for (i=0;i<nmat;i++) h[i] = -1.0;
373:     BVMultVec(W,1.0,1.0,Rv,h);
374:   }
375:   PetscFree4(h,DS0,DS1,Z);
376:   return(0);
377: }

381: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
382: {
383:   PetscErrorCode     ierr;
384:   PetscInt           i,j,incf,incc;
385:   PetscScalar        *y,*g,*xx2,*ww,y2,*dd;
386:   Vec                v,t,xx1;
387:   BV                 WW,T;
388:   KSPConvergedReason reason;

391:   PetscMalloc3(sz,&y,sz,&g,k,&xx2);
392:   if (trans) {
393:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
394:   } else {
395:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
396:   }
397:   xx1 = vw;
398:   VecCopy(x1,xx1);
399:   PetscMemcpy(xx2,x2,sz*sizeof(PetscScalar));
400:   PetscMemzero(sol2,k*sizeof(PetscScalar));
401:   for (i=sz-1;i>=0;i--) {
402:     BVGetColumn(WW,i,&v);
403:     VecConjugate(v);
404:     VecDot(xx1,v,y+i);
405:     VecConjugate(v);
406:     BVRestoreColumn(WW,i,&v);
407:     for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
408:     y[i] = -(y[i]-xx2[i])/dd[i];
409:     BVGetColumn(T,i,&t);
410:     VecAXPY(xx1,-y[i],t);
411:     BVRestoreColumn(T,i,&t);
412:     for(j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
413:     g[i] = xx2[i];
414:   }
415:   if (trans) {
416:     KSPSolveTranspose(ksp,xx1,sol1);
417:     KSPGetConvergedReason(ksp,&reason);
418:     if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
419:   } else {
420:     KSPSolve(ksp,xx1,sol1);
421:     KSPGetConvergedReason(ksp,&reason);
422:     if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);
423:   }
424:   if (trans) {
425:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
426:   } else {
427:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
428:   }
429:   for (i=0;i<sz;i++) {
430:     BVGetColumn(T,i,&t);
431:     VecConjugate(t);
432:     VecDot(sol1,t,&y2);
433:     VecConjugate(t);
434:     BVRestoreColumn(T,i,&t);
435:     for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
436:     y2 = (g[i]-y2)/dd[i];
437:     BVGetColumn(WW,i,&v);
438:     VecAXPY(sol1,-y2,v);
439:     for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
440:     sol2[i] = y[i]+y2;
441:     BVRestoreColumn(WW,i,&v);
442:   }
443:   PetscFree3(y,g,xx2);
444:   return(0);
445: }

449: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx)
450: {
452:   PetscInt       i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
453:   Mat            M1=matctx->M1,*A,*At,Mk;
454:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
455:   PetscScalar    s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
456:   PetscScalar    *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
457:   PetscBLASInt   lds_,lda_,k_;
458:   MatStructure   str;
459:   PetscBool      flg;
460:   BV             M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
461:   Vec            vc,vc2;

464:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
465:   STGetMatStructure(pep->st,&str);
466:   STGetTransform(pep->st,&flg);
467:   if (flg) {
468:     PetscMalloc1(pep->nmat,&At);
469:     for (i=0;i<pep->nmat;i++) {
470:       STGetTOperators(pep->st,i,&At[i]);
471:     }
472:   } else At = pep->A;
473:   if (matctx->subc) A = matctx->A;
474:   else A = At;
475:   /* Form the explicit system matrix */
476:   DHii = T12;
477:   PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
478:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
479:   for (l=2;l<nmat;l++) {
480:     for (j=0;j<k;j++) {
481:       for (i=0;i<k;i++) {
482:         DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
483:       }
484:     }
485:   }

487:   /* T11 */
488:   if (!matctx->compM1) {
489:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
490:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
491:     for (j=1;j<nmat;j++) {
492:       MatAXPY(M1,Ts[j],A[j],str);
493:     }
494:   }

496:   /* T22 */
497:   PetscBLASIntCast(lds,&lds_);
498:   PetscBLASIntCast(k,&k_);
499:   PetscBLASIntCast(lda,&lda_);
500:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
501:   for (i=1;i<deg;i++) {
502:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
503:     s = (i==1)?0.0:1.0;
504:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
505:   }

507:   /* T12 */
508:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
509:   for (i=1;i<nmat;i++) {
510:     MatDenseGetArray(Mk,&array);
511:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
512:     MatDenseRestoreArray(Mk,&array);
513:     BVSetActiveColumns(W,0,k);
514:     BVMult(W,1.0,0.0,V,Mk);
515:     if (i==1) {
516:       BVMatMult(W,A[i],M2);
517:     } else {
518:       BVMatMult(W,A[i],M3); /* using M3 as work space */
519:       BVMult(M2,1.0,1.0,M3,NULL);
520:     }
521:   }

523:   /* T21 */
524:   MatDenseGetArray(Mk,&array);
525:   for (i=1;i<deg;i++) {
526:     s = (i==1)?0.0:1.0;
527:     ss = PetscConj(fh[i]);
528:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
529:   }
530:   MatDenseRestoreArray(Mk,&array);
531:   BVSetActiveColumns(M3,0,k);
532:   BVMult(M3,1.0,0.0,V,Mk);
533:   for (i=0;i<k;i++) {
534:     BVGetColumn(M3,i,&vc);
535:     VecConjugate(vc);
536:     BVRestoreColumn(M3,i,&vc);
537:   }

539:   KSPSetOperators(ksp,M1,M1);
540:   KSPSetUp(ksp);
541:   MatDestroy(&Mk);

543:   /* Set up for BEMW */
544:   for (i=0;i<k;i++) {
545:     BVGetColumn(M2,i,&vc);
546:     BVGetColumn(W,i,&vc2);
547:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
548:     BVRestoreColumn(M2,i,&vc);
549:     BVGetColumn(M3,i,&vc);
550:     VecConjugate(vc);
551:     VecDot(vc2,vc,&d[i]);
552:     VecConjugate(vc);
553:     BVRestoreColumn(M3,i,&vc);
554:     for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
555:     d[i] = M4[i+i*k]-d[i];
556:     BVRestoreColumn(W,i,&vc2);

558:     BVGetColumn(M3,i,&vc);
559:     BVGetColumn(Wt,i,&vc2);
560:     for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
561:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
562:     BVRestoreColumn(M3,i,&vc);
563:     BVGetColumn(M2,i,&vc);
564:     VecConjugate(vc2);
565:     VecDot(vc,vc2,&dt[i]);
566:     VecConjugate(vc2);
567:     BVRestoreColumn(M2,i,&vc);
568:     for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
569:     dt[i] = M4[i+i*k]-dt[i];
570:     BVRestoreColumn(Wt,i,&vc2);
571:   }

573:   if (flg) {
574:     PetscFree(At);
575:   }
576:   PetscFree3(T12,Tr,Ts);
577:   return(0);
578: }

582: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx,BV W)
583: {
584:   PetscErrorCode    ierr;
585:   PetscInt          i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
586:   PetscInt          *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
587:   Mat               M,*E=matctx->E,*A,*At,Mk,Md;
588:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
589:   PetscScalar       s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
590:   PetscBLASInt      lds_,lda_,k_;
591:   const PetscInt    *idxmc;
592:   const PetscScalar *valsc,*carray;
593:   MatStructure      str;
594:   Vec               vc,vc0;
595:   PetscBool         flg;

598:   PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
599:   STGetMatStructure(pep->st,&str);
600:   KSPGetOperators(ksp,&M,NULL);
601:   MatGetOwnershipRange(E[1],&n1,&m1);
602:   MatGetOwnershipRange(E[0],&n0,&m0);
603:   MatGetOwnershipRange(M,&n,NULL);
604:   PetscMalloc1(nmat,&ts);
605:   STGetTransform(pep->st,&flg);
606:   if (flg) {
607:     PetscMalloc1(pep->nmat,&At);
608:     for (i=0;i<pep->nmat;i++) {
609:       STGetTOperators(pep->st,i,&At[i]);
610:     }
611:   } else At = pep->A;
612:   if (matctx->subc) A = matctx->A;
613:   else A = At;
614:   /* Form the explicit system matrix */
615:   DHii = T12;
616:   PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
617:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
618:   for (d=2;d<nmat;d++) {
619:     for (j=0;j<k;j++) {
620:       for (i=0;i<k;i++) {
621:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
622:       }
623:     }
624:   }

626:   /* T11 */
627:   if (!matctx->compM1) {
628:     MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
629:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
630:     for (j=1;j<nmat;j++) {
631:       MatAXPY(E[0],Ts[j],A[j],str);
632:     }
633:   }
634:   for (i=n0;i<m0;i++) {
635:     MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
636:     idx = n+i-n0;
637:     for (j=0;j<ncols;j++) {
638:       idxg[j] = matctx->map0[idxmc[j]];
639:     }
640:     MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
641:     MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
642:   }

644:   /* T22 */
645:   PetscBLASIntCast(lds,&lds_);
646:   PetscBLASIntCast(k,&k_);
647:   PetscBLASIntCast(lda,&lda_);
648:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
649:   for (i=1;i<deg;i++) {
650:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
651:     s = (i==1)?0.0:1.0;
652:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
653:   }
654:   for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
655:   for (i=0;i<m1-n1;i++) {
656:     idx = n+m0-n0+i;
657:     for (j=0;j<k;j++) {
658:       Tr[j] = T22[n1+i+j*k];
659:     }
660:     MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
661:   }

663:   /* T21 */
664:   for (i=1;i<deg;i++) {
665:     s = (i==1)?0.0:1.0;
666:     ss = PetscConj(fh[i]);
667:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
668:   }
669:   BVSetActiveColumns(W,0,k);
670:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
671:   BVMult(W,1.0,0.0,V,Mk);
672:   for (i=0;i<k;i++) {
673:     BVGetColumn(W,i,&vc);
674:     VecConjugate(vc);
675:     VecGetArrayRead(vc,&carray);
676:     idx = matctx->map1[i];
677:     MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
678:     VecRestoreArrayRead(vc,&carray);
679:     BVRestoreColumn(W,i,&vc);
680:   }

682:   /* T12 */
683:   for (i=1;i<nmat;i++) {
684:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
685:     for (j=0;j<k;j++) {
686:       PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
687:     }
688:   }
689:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
690:   for (i=0;i<nmat;i++) ts[i] = 1.0;
691:   for (j=0;j<k;j++) {
692:     MatDenseGetArray(Md,&array);
693:     PetscMemcpy(array,T12+k+j*lda,(nmat-1)*k*sizeof(PetscScalar));
694:     MatDenseRestoreArray(Md,&array);
695:     BVSetActiveColumns(W,0,nmat-1);
696:     BVMult(W,1.0,0.0,V,Md);
697:     for (i=nmat-1;i>0;i--) {
698:       BVGetColumn(W,i-1,&vc0);
699:       BVGetColumn(W,i,&vc);
700:       MatMult(A[i],vc0,vc);
701:       BVRestoreColumn(W,i-1,&vc0);
702:       BVRestoreColumn(W,i,&vc);
703:     }
704:     BVSetActiveColumns(W,1,nmat);
705:     BVGetColumn(W,0,&vc0);
706:     BVMultVec(W,1.0,0.0,vc0,ts);
707:     VecGetArrayRead(vc0,&carray);
708:     idx = matctx->map1[j];
709:     MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
710:     VecRestoreArrayRead(vc0,&carray);
711:     BVRestoreColumn(W,0,&vc0);
712:   }
713:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
714:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
715:   KSPSetOperators(ksp,M,M);
716:   KSPSetUp(ksp);
717:   PetscFree(ts);
718:   MatDestroy(&Mk);
719:   MatDestroy(&Md);
720:   if (flg) {
721:     PetscFree(At);
722:   }
723:   PetscFree5(T22,T21,T12,Tr,Ts);
724:   return(0);
725: }

729: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx)
730: {
731:   PetscErrorCode     ierr;
732:   PetscInt           n0,m0,n1,m1,i;
733:   PetscScalar        *arrayV;
734:   const PetscScalar  *array;
735:   KSPConvergedReason reason;

738:   MatGetOwnershipRange(matctx->E[1],&n1,&m1);
739:   MatGetOwnershipRange(matctx->E[0],&n0,&m0);

741:   /* Right side */
742:   VecGetArrayRead(Rv,&array);
743:   VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
744:   VecRestoreArrayRead(Rv,&array);
745:   VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
746:   VecAssemblyBegin(matctx->tN);
747:   VecAssemblyEnd(matctx->tN);

749:   /* Solve */
750:   KSPSolve(ksp,matctx->tN,matctx->ttN);
751:   KSPGetConvergedReason(ksp,&reason);
752:   if (reason<0) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSP did not converge (reason=%s)",KSPConvergedReasons[reason]);

754:   /* Retrieve solution */
755:   VecGetArray(dVi,&arrayV);
756:   VecGetArrayRead(matctx->ttN,&array);
757:   PetscMemcpy(arrayV,array,(m0-n0)*sizeof(PetscScalar));
758:   VecRestoreArray(dVi,&arrayV);
759:   if (!matctx->subc) {
760:     VecGetArray(matctx->t1,&arrayV);
761:     for (i=0;i<m1-n1;i++) arrayV[i] =  array[m0-n0+i];
762:     VecRestoreArray(matctx->t1,&arrayV);
763:     VecRestoreArrayRead(matctx->ttN,&array);
764:     VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
765:     VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
766:     VecGetArrayRead(matctx->vseq,&array);
767:     for (i=0;i<k;i++) dHi[i] = array[i];
768:     VecRestoreArrayRead(matctx->vseq,&array);
769:   }
770:   return(0);
771: }

775: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx,BV W)
776: {
777:   PetscErrorCode    ierr;
778:   PetscInt          j,m,lda=pep->nmat*k,n0,m0,idx;
779:   PetscMPIInt       root,len;
780:   PetscScalar       *array2,h;
781:   const PetscScalar *array;
782:   Vec               R,Vi;
783:   FSubctx           *ctx;
784:   Mat               M;

787:   if (!matctx || !matctx->subc) {
788:     for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
789:     h   = H[i+i*ldh];
790:     idx = i;
791:     R   = Rv;
792:     Vi  = dVi;
793:     switch (pep->scheme) {
794:     case PEP_REFINE_SCHEME_EXPLICIT:
795:       NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
796:       matctx->compM1 = PETSC_FALSE;
797:       break;
798:     case PEP_REFINE_SCHEME_MBE:
799:       NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
800:       matctx->compM1 = PETSC_FALSE;
801:       break;
802:     case PEP_REFINE_SCHEME_SCHUR:
803:       KSPGetOperators(ksp,&M,NULL);
804:       MatShellGetContext(M,&ctx);
805:       NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
806:       ctx->compM1 = PETSC_FALSE;
807:       break;
808:     }
809:   } else {
810:     if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
811:       for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
812:       h = H[idx+idx*ldh];
813:       matctx->idx = idx;
814:       switch (pep->scheme) {
815:       case PEP_REFINE_SCHEME_EXPLICIT:
816:         NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
817:         matctx->compM1 = PETSC_FALSE;
818:         break;
819:       case PEP_REFINE_SCHEME_MBE:
820:         NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
821:         matctx->compM1 = PETSC_FALSE;
822:         break;
823:       case PEP_REFINE_SCHEME_SCHUR:
824:         break;
825:       }
826:     } else idx = matctx->idx;
827:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
828:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
829:     VecGetArrayRead(matctx->tg,&array);
830:     VecPlaceArray(matctx->t,array);
831:     VecCopy(matctx->t,matctx->Rv);
832:     VecResetArray(matctx->t);
833:     VecRestoreArrayRead(matctx->tg,&array);
834:     R  = matctx->Rv;
835:     Vi = matctx->Vi;
836:   }
837:   if (idx==i && idx<k) {
838:     switch (pep->scheme) {
839:       case PEP_REFINE_SCHEME_EXPLICIT:
840:         NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
841:         break;
842:       case PEP_REFINE_SCHEME_MBE:
843:         NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
844:         break;
845:       case PEP_REFINE_SCHEME_SCHUR:
846:         NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
847:         break;
848:     }
849:   }
850:   if (matctx && matctx->subc) {
851:     VecGetLocalSize(Vi,&m);
852:     VecGetArrayRead(Vi,&array);
853:     VecGetArray(matctx->tg,&array2);
854:     PetscMemcpy(array2,array,m*sizeof(PetscScalar));
855:     VecRestoreArray(matctx->tg,&array2);
856:     VecRestoreArrayRead(Vi,&array);
857:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
858:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
859:     switch (pep->scheme) {
860:     case PEP_REFINE_SCHEME_EXPLICIT:
861:       MatGetOwnershipRange(matctx->E[0],&n0,&m0);
862:       VecGetArrayRead(matctx->ttN,&array);
863:       VecPlaceArray(matctx->tp,array+m0-n0);
864:       VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
865:       VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
866:       VecResetArray(matctx->tp);
867:       VecRestoreArrayRead(matctx->ttN,&array);
868:       VecGetArrayRead(matctx->tpg,&array);
869:       for (j=0;j<k;j++) dHi[j] = array[j];
870:       VecRestoreArrayRead(matctx->tpg,&array);
871:       break;
872:      case PEP_REFINE_SCHEME_MBE:
873:       root = 0;
874:       for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
875:       PetscMPIIntCast(k,&len);
876:       MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
877:       break;
878:     case PEP_REFINE_SCHEME_SCHUR:
879:       break;
880:     }
881:   }
882:   return(0);
883: }

887: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,MatExplicitCtx *matctx)
888: {
890:   PetscInt       i,nmat=pep->nmat,lda=nmat*k;
891:   PetscScalar    *fh,*Rh,*DfH;
892:   PetscReal      norm;
893:   BV             W;
894:   Vec            Rv,t,dvi;
895:   FSubctx        *ctx;
896:   Mat            M,*At;
897:   PetscBool      flg,lindep;

900:   PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
901:   *rds = 0;
902:   BVCreateVec(pep->V,&Rv);
903:   switch (pep->scheme) {
904:   case PEP_REFINE_SCHEME_EXPLICIT:
905:     BVCreateVec(pep->V,&t);
906:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
907:     PetscMalloc1(nmat,&fh);
908:     break;
909:   case PEP_REFINE_SCHEME_MBE:
910:     if (matctx->subc) {
911:       BVCreateVec(pep->V,&t);
912:       BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
913:     } else {
914:       W = matctx->W;
915:       PetscObjectReference((PetscObject)W);
916:       t = matctx->t;
917:       PetscObjectReference((PetscObject)t);
918:     }
919:     BVScale(matctx->W,0.0);
920:     BVScale(matctx->Wt,0.0);
921:     BVScale(matctx->M2,0.0);
922:     BVScale(matctx->M3,0.0);
923:     PetscMalloc1(nmat,&fh);
924:     break;
925:   case PEP_REFINE_SCHEME_SCHUR:
926:     KSPGetOperators(ksp,&M,NULL);
927:     MatShellGetContext(M,&ctx);
928:     BVCreateVec(pep->V,&t);
929:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
930:     fh = ctx->fih;
931:     break;
932:   }
933:   PetscMemzero(dVS,2*k*k*sizeof(PetscScalar));
934:   PetscMemzero(DfH,lda*k*sizeof(PetscScalar));
935:   STGetTransform(pep->st,&flg);
936:   if (flg) {
937:     PetscMalloc1(pep->nmat,&At);
938:     for (i=0;i<pep->nmat;i++) {
939:       STGetTOperators(pep->st,i,&At[i]);
940:     }
941:   } else At = pep->A;

943:   /* Main loop for computing the ith columns of dX and dS */
944:   for (i=0;i<k;i++) {
945:     /* Compute and update i-th column of the right hand side */
946:     PetscMemzero(Rh,k*sizeof(PetscScalar));
947:     NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);

949:     /* Update and solve system */
950:     BVGetColumn(dV,i,&dvi);
951:     NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
952:     /* Orthogonalize computed solution */
953:     BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
954:     BVRestoreColumn(dV,i,&dvi);
955:     if (!lindep) {
956:       BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
957:       if (!lindep) {
958:         dVS[k+i+i*2*k] = norm;
959:         BVScaleColumn(dV,i,1.0/norm);
960:         (*rds)++;
961:       }
962:     }
963:   }
964:   BVSetActiveColumns(dV,0,*rds);
965:   VecDestroy(&t);
966:   VecDestroy(&Rv);
967:   BVDestroy(&W);
968:   if (flg) {
969:     PetscFree(At);
970:   }
971:   PetscFree2(DfH,Rh);
972:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) { PetscFree(fh); }
973:   return(0);
974: }

978: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscInt *prs)
979: {
980: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
982:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routine is unavailable");
983: #else
985:   PetscInt       i,j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,rs=*prs,ldg;
986:   PetscScalar    *T,*G,*tau,*array,sone=1.0,zero=0.0,*work;
987:   PetscBLASInt   rs_,lds_,k_,ldh_,info,ldg_,lda_;
988:   Mat            M0;

991:   PetscMalloc4(rs*k,&T,k,&tau,k,&work,deg*k*k,&G);
992:   PetscBLASIntCast(lds,&lds_);
993:   PetscBLASIntCast(lda,&lda_);
994:   PetscBLASIntCast(k,&k_);
995:   if (rs>k) { /* Truncate S to have k columns*/
996:     for (j=0;j<k;j++) {
997:       PetscMemcpy(T+j*rs,S+j*lds,rs*sizeof(PetscScalar));
998:     }
999:     PetscBLASIntCast(rs,&rs_);
1000:     PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&rs_,&k_,T,&rs_,tau,work,&k_,&info));
1001:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
1002:     /* Copy triangular matrix in S */
1003:     PetscMemzero(S,lds*k*sizeof(PetscScalar));
1004:     for (j=0;j<k;j++) for (i=0;i<=j;i++) S[j*lds+i] = T[j*rs+i];
1005:     PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&rs_,&k_,&k_,T,&rs_,tau,work,&k_,&info));
1006:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
1007:     MatCreateSeqDense(PETSC_COMM_SELF,rs,k,NULL,&M0);
1008:     MatDenseGetArray(M0,&array);
1009:     for (j=0;j<k;j++) {
1010:       PetscMemcpy(array+j*rs,T+j*rs,rs*sizeof(PetscScalar));
1011:     }
1012:     MatDenseRestoreArray(M0,&array);
1013:     BVSetActiveColumns(pep->V,0,rs);
1014:     BVMultInPlace(pep->V,M0,0,k);
1015:     BVSetActiveColumns(pep->V,0,k);
1016:     MatDestroy(&M0);
1017:     *prs = rs = k;
1018:   }
1019:   /* Form auxiliary matrix for the orthogonalization step */
1020:   ldg = deg*k;
1021:   PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1022:   PetscBLASIntCast(ldg,&ldg_);
1023:   PetscBLASIntCast(ldh,&ldh_);
1024:   for (j=0;j<deg;j++) {
1025:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
1026:   }
1027:   /* Orthogonalize and update S */
1028:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
1029:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
1030:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));

1032:   /* Update H */
1033:   PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
1034:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
1035:   PetscFree4(T,tau,work,G);
1036:   return(0);
1037: #endif
1038: }

1042: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
1043: {
1044: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
1046:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routine is unavailable");
1047: #else
1049:   PetscInt       i,j,nmat=pep->nmat,lda=nmat*k;
1050:   PetscScalar    *tau,*array,*work;
1051:   PetscBLASInt   lds_,k_,lda_,ldh_,kdrs_,info,k2_;
1052:   Mat            M0;

1055:   PetscMalloc2(k,&tau,k,&work);
1056:   PetscBLASIntCast(lds,&lds_);
1057:   PetscBLASIntCast(lda,&lda_);
1058:   PetscBLASIntCast(ldh,&ldh_);
1059:   PetscBLASIntCast(k,&k_);
1060:   PetscBLASIntCast(2*k,&k2_);
1061:   PetscBLASIntCast((k+rds),&kdrs_);
1062:   /* Update H */
1063:   for (j=0;j<k;j++) {
1064:     for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
1065:   }
1066:   /* Update V */
1067:   for (j=0;j<k;j++) {
1068:     for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
1069:     for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
1070:   }
1071:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
1072:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
1073:   /* Copy triangular matrix in S */
1074:   for (j=0;j<k;j++) {
1075:     for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
1076:     for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
1077:   }
1078:   PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
1079:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
1080:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
1081:   MatDenseGetArray(M0,&array);
1082:   for (j=0;j<k;j++) {
1083:     PetscMemcpy(array+j*k,dVS+j*2*k,k*sizeof(PetscScalar));
1084:   }
1085:   MatDenseRestoreArray(M0,&array);
1086:   BVMultInPlace(pep->V,M0,0,k);
1087:   if (rds) {
1088:     MatDenseGetArray(M0,&array);
1089:     for (j=0;j<k;j++) {
1090:       PetscMemcpy(array+j*k,dVS+k+j*2*k,rds*sizeof(PetscScalar));
1091:     }
1092:     MatDenseRestoreArray(M0,&array);
1093:     BVMultInPlace(dV,M0,0,k);
1094:     BVMult(pep->V,1.0,1.0,dV,NULL);
1095:   }
1096:   MatDestroy(&M0);
1097:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds,&k);
1098:   PetscFree2(tau,work);
1099:   return(0);
1100: #endif
1101: }

1105: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,MatExplicitCtx *matctx,PetscBool ini)
1106: {
1107:   PetscErrorCode    ierr;
1108:   FSubctx           *ctx;
1109:   PetscScalar       t,*coef;
1110:   const PetscScalar *array;
1111:   MatStructure      str;
1112:   PetscInt          j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
1113:   MPI_Comm          comm;
1114:   PetscMPIInt       np;
1115:   const PetscInt    *rgs0,*rgs1;
1116:   Mat               B,C,*E,*A,*At;
1117:   IS                is1,is2;
1118:   Vec               v;
1119:   PetscBool         flg;
1120:   Mat               M,P;

1123:   PetscMalloc1(nmat,&coef);
1124:   STGetTransform(pep->st,&flg);
1125:   if (flg) {
1126:     PetscMalloc1(pep->nmat,&At);
1127:     for (i=0;i<pep->nmat;i++) {
1128:       STGetTOperators(pep->st,i,&At[i]);
1129:     }
1130:   } else At = pep->A;
1131:   switch (pep->scheme) {
1132:   case PEP_REFINE_SCHEME_EXPLICIT:
1133:     if (ini) {
1134:       if (matctx->subc) {
1135:         A = matctx->A;
1136:         comm = PetscSubcommChild(matctx->subc);
1137:       } else {
1138:         A = At;
1139:         PetscObjectGetComm((PetscObject)pep,&comm);
1140:       }
1141:       E = matctx->E;
1142:       STGetMatStructure(pep->st,&str);
1143:       MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1144:       j = (matctx->subc)?matctx->subc->color:0;
1145:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1146:       for (j=1;j<nmat;j++) {
1147:         MatAXPY(E[0],coef[j],A[j],str);
1148:       }
1149:       MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1150:       MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
1151:       MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
1152:       MatGetOwnershipRange(E[0],&n0,&m0);
1153:       MatGetOwnershipRange(E[1],&n1,&m1);
1154:       MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1155:       MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1156:       /* T12 and T21 are computed from V and V*, so,
1157:          they must have the same column and row ranges */
1158:       if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
1159:       MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1160:       MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1161:       MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1162:       MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1163:       MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1164:       MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1165:       SlepcMatTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1166:       MatDestroy(&B);
1167:       MatDestroy(&C);
1168:       matctx->compM1 = PETSC_TRUE;
1169:       MatGetSize(E[0],NULL,&N0);
1170:       MatGetSize(E[1],NULL,&N1);
1171:       MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1172:       MatGetOwnershipRanges(E[0],&rgs0);
1173:       MatGetOwnershipRanges(E[1],&rgs1);
1174:       PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1175:       /* Create column (and row) mapping */
1176:       for (p=0;p<np;p++) {
1177:         for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1178:         for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1179:       }
1180:       MatCreateVecs(M,NULL,&matctx->tN);
1181:       MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1182:       VecDuplicate(matctx->tN,&matctx->ttN);
1183:       if (matctx->subc) {
1184:         MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1185:         count = np*k;
1186:         PetscMalloc2(count,&idx1,count,&idx2);
1187:         VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1188:         VecGetOwnershipRange(matctx->tp,&l0,NULL);
1189:         VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1190:         for (si=0;si<matctx->subc->n;si++) {
1191:           if (matctx->subc->color==si) {
1192:             j=0;
1193:             if (matctx->subc->color==si) {
1194:               for (p=0;p<np;p++) {
1195:                 for (i=n1;i<m1;i++) {
1196:                   idx1[j] = l0+i-n1;
1197:                   idx2[j++] =p*k+i;
1198:                 }
1199:               }
1200:             }
1201:             count = np*(m1-n1);
1202:           } else count =0;
1203:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1204:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1205:           VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1206:           ISDestroy(&is1);
1207:           ISDestroy(&is2);
1208:         }
1209:         PetscFree2(idx1,idx2);
1210:       } else {
1211:         VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1212:       }
1213:       P = M;
1214:     } else {
1215:       if (matctx->subc) {
1216:         /* Scatter vectors pep->V */
1217:         for (i=0;i<k;i++) {
1218:           BVGetColumn(pep->V,i,&v);
1219:           VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1220:           VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1221:           BVRestoreColumn(pep->V,i,&v);
1222:           VecGetArrayRead(matctx->tg,&array);
1223:           VecPlaceArray(matctx->t,(const PetscScalar*)array);
1224:           BVInsertVec(matctx->V,i,matctx->t);
1225:           VecResetArray(matctx->t);
1226:           VecRestoreArrayRead(matctx->tg,&array);
1227:         }
1228:       }
1229:     }
1230:     break;
1231:   case PEP_REFINE_SCHEME_MBE:
1232:     if (ini) {
1233:       if (matctx->subc) {
1234:         A = matctx->A;
1235:         comm = PetscSubcommChild(matctx->subc);
1236:       } else {
1237:         matctx->V = pep->V;
1238:         A = At;
1239:         PetscObjectGetComm((PetscObject)pep,&comm);
1240:         MatCreateVecs(pep->A[0],&matctx->t,NULL);
1241:       }
1242:       STGetMatStructure(pep->st,&str);
1243:       MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1244:       j = (matctx->subc)?matctx->subc->color:0;
1245:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1246:       for (j=1;j<nmat;j++) {
1247:         MatAXPY(matctx->M1,coef[j],A[j],str);
1248:       }
1249:       BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1250:       BVDuplicateResize(matctx->V,k,&matctx->M2);
1251:       BVDuplicate(matctx->M2,&matctx->M3);
1252:       BVDuplicate(matctx->M2,&matctx->Wt);
1253:       PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1254:       matctx->compM1 = PETSC_TRUE;
1255:       M = matctx->M1;
1256:       P = M;
1257:     }
1258:     break;
1259:   case PEP_REFINE_SCHEME_SCHUR:
1260:     if (ini) {
1261:       PetscObjectGetComm((PetscObject)pep,&comm);
1262:       MatGetSize(At[0],&m0,&n0);
1263:       PetscMalloc1(1,&ctx);
1264:       STGetMatStructure(pep->st,&str);
1265:       /* Create a shell matrix to solve the linear system */
1266:       ctx->A = At;
1267:       ctx->V = pep->V;
1268:       ctx->k = k; ctx->nmat = nmat;
1269:       PetscMalloc4(k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1270:       PetscMemzero(ctx->M4,k*k*sizeof(PetscScalar));
1271:       MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1272:       MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatFSMult);
1273:       BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1274:       BVDuplicateResize(ctx->V,k,&ctx->M2);
1275:       BVDuplicate(ctx->M2,&ctx->M3);
1276:       BVCreateVec(pep->V,&ctx->t);
1277:       MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1278:       PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1279:       for (j=1;j<nmat;j++) {
1280:         MatAXPY(ctx->M1,coef[j],At[j],str);
1281:       }
1282:       MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1283:       /* Compute a precond matrix for the system */
1284:       t = H[0];
1285:       PEPEvaluateBasis(pep,t,0,coef,NULL);
1286:       for (j=1;j<nmat;j++) {
1287:         MatAXPY(P,coef[j],At[j],str);
1288:       }
1289:       ctx->compM1 = PETSC_TRUE;
1290:     }
1291:     break;
1292:   }
1293:   if (ini) {
1294:     PEPRefineGetKSP(pep,&pep->refineksp);
1295:     KSPSetOperators(pep->refineksp,M,P);
1296:     KSPSetFromOptions(pep->refineksp);
1297:   }

1299:   if (!ini && matctx && matctx->subc) {
1300:      /* Scatter vectors pep->V */
1301:     for (i=0;i<k;i++) {
1302:       BVGetColumn(pep->V,i,&v);
1303:       VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1304:       VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1305:       BVRestoreColumn(pep->V,i,&v);
1306:       VecGetArrayRead(matctx->tg,&array);
1307:       VecPlaceArray(matctx->t,(const PetscScalar*)array);
1308:       BVInsertVec(matctx->V,i,matctx->t);
1309:       VecResetArray(matctx->t);
1310:       VecRestoreArrayRead(matctx->tg,&array);
1311:     }
1312:    }
1313:   PetscFree(coef);
1314:   if (flg) {
1315:     PetscFree(At);
1316:   }
1317:   return(0);
1318: }

1322: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,MatExplicitCtx *matctx,PetscInt nsubc)
1323: {
1324:   PetscErrorCode    ierr;
1325:   PetscInt          i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1326:   IS                is1,is2;
1327:   BVType            type;
1328:   Vec               v;
1329:   const PetscScalar *array;
1330:   Mat               *A;
1331:   PetscBool         flg;

1334:   STGetTransform(pep->st,&flg);
1335:   if (flg) {
1336:     PetscMalloc1(pep->nmat,&A);
1337:     for (i=0;i<pep->nmat;i++) {
1338:       STGetTOperators(pep->st,i,&A[i]);
1339:     }
1340:   } else A = pep->A;

1342:   /* Duplicate pep matrices */
1343:   PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1344:   for (i=0;i<pep->nmat;i++) {
1345:     MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1346:   }

1348:   /* Create Scatter */
1349:   MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1350:   MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1351:   VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1352:   BVGetColumn(pep->V,0,&v);
1353:   VecGetOwnershipRange(v,&n0,&m0);
1354:   nloc0 = m0-n0;
1355:   PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1356:   j = 0;
1357:   for (si=0;si<matctx->subc->n;si++) {
1358:     for (i=n0;i<m0;i++) {
1359:       idx1[j]   = i;
1360:       idx2[j++] = i+pep->n*si;
1361:     }
1362:   }
1363:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1364:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1365:   VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1366:   ISDestroy(&is1);
1367:   ISDestroy(&is2);
1368:   for (si=0;si<matctx->subc->n;si++) {
1369:     j=0;
1370:     for (i=n0;i<m0;i++) {
1371:       idx1[j] = i;
1372:       idx2[j++] = i+pep->n*si;
1373:     }
1374:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1375:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1376:     VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1377:     ISDestroy(&is1);
1378:     ISDestroy(&is2);
1379:   }
1380:   BVRestoreColumn(pep->V,0,&v);
1381:   PetscFree2(idx1,idx2);

1383:   /* Duplicate pep->V vecs */
1384:   BVGetType(pep->V,&type);
1385:   BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1386:   BVSetType(matctx->V,type);
1387:   BVSetSizesFromVec(matctx->V,matctx->t,k);
1388:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1389:     BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1390:   }
1391:   for (i=0;i<k;i++) {
1392:     BVGetColumn(pep->V,i,&v);
1393:     VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1394:     VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1395:     BVRestoreColumn(pep->V,i,&v);
1396:     VecGetArrayRead(matctx->tg,&array);
1397:     VecPlaceArray(matctx->t,(const PetscScalar*)array);
1398:     BVInsertVec(matctx->V,i,matctx->t);
1399:     VecResetArray(matctx->t);
1400:     VecRestoreArrayRead(matctx->tg,&array);
1401:   }

1403:   VecDuplicate(matctx->t,&matctx->Rv);
1404:   VecDuplicate(matctx->t,&matctx->Vi);
1405:   if (flg) {
1406:     PetscFree(A);
1407:   }
1408:   return(0);
1409: }

1413: static PetscErrorCode NRefSubcommDestroy(PEP pep,MatExplicitCtx *matctx)
1414: {
1416:   PetscInt       i;

1419:   VecScatterDestroy(&matctx->scatter_sub);
1420:   for (i=0;i<matctx->subc->n;i++) {
1421:     VecScatterDestroy(&matctx->scatter_id[i]);
1422:   }
1423:   for (i=0;i<pep->nmat;i++) {
1424:     MatDestroy(&matctx->A[i]);
1425:   }
1426:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1427:     for (i=0;i<matctx->subc->n;i++) {
1428:       VecScatterDestroy(&matctx->scatterp_id[i]);
1429:     }
1430:     VecDestroy(&matctx->tp);
1431:     VecDestroy(&matctx->tpg);
1432:     BVDestroy(&matctx->W);
1433:   }
1434:   PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1435:   BVDestroy(&matctx->V);
1436:   VecDestroy(&matctx->t);
1437:   VecDestroy(&matctx->tg);
1438:   VecDestroy(&matctx->Rv);
1439:   VecDestroy(&matctx->Vi);
1440:   return(0);
1441: }

1445: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds,PetscInt *prs)
1446: {
1447: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI)
1449:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI - Lapack routine is unavailable");
1450: #else
1452:   PetscScalar    *H,*work,*dH,*fH,*dVS;
1453:   PetscInt       ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1454:   PetscBLASInt   k_,ld_,*p,info;
1455:   BV             dV;
1456:   PetscBool      sinvert,flg;
1457:   MatExplicitCtx *matctx=NULL;
1458:   Vec            v;
1459:   Mat            M,P;
1460:   FSubctx        *ctx;

1463:   PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1464:   if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1465:   /* the input tolerance is not being taken into account (by the moment) */
1466:   its = *maxits;
1467:   PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1468:   DSGetLeadingDimension(pep->ds,&ldh);
1469:   DSGetArray(pep->ds,DS_MAT_A,&H);
1470:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1471:   PetscMalloc1(2*k*k,&dVS);
1472:   STGetTransform(pep->st,&flg);
1473:   if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1474:     PetscBLASIntCast(k,&k_);
1475:     PetscBLASIntCast(ldh,&ld_);
1476:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1477:     if (sinvert) {
1478:       DSGetArray(pep->ds,DS_MAT_A,&H);
1479:       PetscMalloc1(k,&p);
1480:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1481:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1482:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1483:       pep->ops->backtransform = NULL;
1484:     }
1485:     if (sigma!=0.0) {
1486:       DSGetArray(pep->ds,DS_MAT_A,&H);
1487:       for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1488:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1489:       pep->ops->backtransform = NULL;
1490:     }
1491:   }
1492:   if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1493:     DSGetArray(pep->ds,DS_MAT_A,&H);
1494:     for (j=0;j<k;j++) {
1495:       for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1496:     }
1497:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
1498:     if (!flg) {
1499:       /* Restore original values */
1500:       for (i=0;i<pep->nmat;i++){
1501:         pep->pbc[pep->nmat+i] *= pep->sfactor;
1502:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1503:       }
1504:     }
1505:   }
1506:   if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1507:     for (i=0;i<k;i++) {
1508:       BVGetColumn(pep->V,i,&v);
1509:       VecPointwiseMult(v,v,pep->Dr);
1510:       BVRestoreColumn(pep->V,i,&v);
1511:     }
1512:   }
1513:   DSGetArray(pep->ds,DS_MAT_A,&H);

1515:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds,prs);
1516:   /* check if H is in Schur form */
1517:   for (i=0;i<k-1;i++) {
1518:     if (H[i+1+i*ldh]!=0.0) {
1519: #if !defined(PETSC_USES_COMPLEX)
1520:       SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires the complex Schur form of the projected matrix");
1521: #else
1522:       SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1523: #endif
1524:     }
1525:   }
1526:   if (nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair dimension");
1527:   BVSetActiveColumns(pep->V,0,k);
1528:   BVDuplicateResize(pep->V,k,&dV);
1529:   PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1530:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1531:     PetscMalloc1(1,&matctx);
1532:     if (nsubc>1) { /* spliting in subcommunicators */
1533:       matctx->subc = pep->refinesubc;
1534:       NRefSubcommSetup(pep,k,matctx,nsubc);
1535:     } else matctx->subc=NULL;
1536:   }

1538:   /* Loop performing iterative refinements */
1539:   for (i=0;i<its;i++) {
1540:     /* Pre-compute the polynomial basis evaluated in H */
1541:     PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1542:     PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1543:     /* Solve the linear system */
1544:     PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1545:     /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1546:     PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1547:   }
1548:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1549:   if (!flg && sinvert) {
1550:     PetscFree(p);
1551:   }
1552:   PetscFree3(dH,fH,work);
1553:   PetscFree(dVS);
1554:   BVDestroy(&dV);
1555:   switch (pep->scheme) {
1556:   case PEP_REFINE_SCHEME_EXPLICIT:
1557:     for (i=0;i<2;i++) {
1558:       MatDestroy(&matctx->E[i]);
1559:     }
1560:     PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1561:     VecDestroy(&matctx->tN);
1562:     VecDestroy(&matctx->ttN);
1563:     VecDestroy(&matctx->t1);
1564:     if (nsubc>1) {
1565:       NRefSubcommDestroy(pep,matctx);
1566:     } else {
1567:       VecDestroy(&matctx->vseq);
1568:       VecScatterDestroy(&matctx->scatterctx);
1569:     }
1570:     PetscFree(matctx);
1571:     KSPGetOperators(pep->refineksp,&M,NULL);
1572:     MatDestroy(&M);
1573:     break;
1574:   case PEP_REFINE_SCHEME_MBE:
1575:     BVDestroy(&matctx->W);
1576:     BVDestroy(&matctx->Wt);
1577:     BVDestroy(&matctx->M2);
1578:     BVDestroy(&matctx->M3);
1579:     MatDestroy(&matctx->M1);
1580:     VecDestroy(&matctx->t);
1581:     PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1582:     if (nsubc>1) {
1583:       NRefSubcommDestroy(pep,matctx);
1584:     }
1585:     PetscFree(matctx);
1586:     break;
1587:   case PEP_REFINE_SCHEME_SCHUR:
1588:     KSPGetOperators(pep->refineksp,&M,&P);
1589:     MatShellGetContext(M,&ctx);
1590:     PetscFree4(ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1591:     MatDestroy(&ctx->M1);
1592:     BVDestroy(&ctx->M2);
1593:     BVDestroy(&ctx->M3);
1594:     BVDestroy(&ctx->W);
1595:     VecDestroy(&ctx->t);
1596:     PetscFree(ctx);
1597:     MatDestroy(&M);
1598:     MatDestroy(&P);
1599:     break;
1600:   }
1601:   PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1602:   return(0);
1603: #endif
1604: }