Actual source code: bddcprivate.c

petsc-3.11.0 2019-03-29
Report Typos and Errors
  1:  #include <../src/mat/impls/aij/seq/aij.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  3:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>
  5:  #include <petscdmplex.h>
  6:  #include <petscblaslapack.h>
  7:  #include <petsc/private/sfimpl.h>
  8:  #include <petsc/private/dmpleximpl.h>
  9:  #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17: #if !defined(PETSC_USE_COMPLEX)
 18:   PetscScalar    *uwork,*data,*U, ds = 0.;
 19:   PetscReal      *sing;
 20:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 21:   PetscInt       ulw,i,nr,nc,n;

 25: #if defined(PETSC_MISSING_LAPACK_GESVD)
 26:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
 27: #else
 28:   MatGetSize(A,&nr,&nc);
 29:   if (!nr || !nc) return(0);

 31:   /* workspace */
 32:   if (!work) {
 33:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 34:     PetscMalloc1(ulw,&uwork);
 35:   } else {
 36:     ulw   = lw;
 37:     uwork = work;
 38:   }
 39:   n = PetscMin(nr,nc);
 40:   if (!rwork) {
 41:     PetscMalloc1(n,&sing);
 42:   } else {
 43:     sing = rwork;
 44:   }

 46:   /* SVD */
 47:   PetscMalloc1(nr*nr,&U);
 48:   PetscBLASIntCast(nr,&bM);
 49:   PetscBLASIntCast(nc,&bN);
 50:   PetscBLASIntCast(ulw,&lwork);
 51:   MatDenseGetArray(A,&data);
 52:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 53:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 54:   PetscFPTrapPop();
 55:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
 56:   MatDenseRestoreArray(A,&data);
 57:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 58:   if (!rwork) {
 59:     PetscFree(sing);
 60:   }
 61:   if (!work) {
 62:     PetscFree(uwork);
 63:   }
 64:   /* create B */
 65:   if (!range) {
 66:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 67:     MatDenseGetArray(*B,&data);
 68:     PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
 69:   } else {
 70:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 71:     MatDenseGetArray(*B,&data);
 72:     PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
 73:   }
 74:   MatDenseRestoreArray(*B,&data);
 75:   PetscFree(U);
 76: #endif
 77: #else /* PETSC_USE_COMPLEX */
 79:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
 80: #endif
 81:   return(0);
 82: }

 84: /* TODO REMOVE */
 85: #if defined(PRINT_GDET)
 86: static int inc = 0;
 87: static int lev = 0;
 88: #endif

 90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 91: {
 93:   Mat            GE,GEd;
 94:   PetscInt       rsize,csize,esize;
 95:   PetscScalar    *ptr;

 98:   ISGetSize(edge,&esize);
 99:   if (!esize) return(0);
100:   ISGetSize(extrow,&rsize);
101:   ISGetSize(extcol,&csize);

103:   /* gradients */
104:   ptr  = work + 5*esize;
105:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108:   MatDestroy(&GE);

110:   /* constants */
111:   ptr += rsize*csize;
112:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115:   MatDestroy(&GE);
116:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117:   MatDestroy(&GEd);

119:   if (corners) {
120:     Mat            GEc;
121:     PetscScalar    *vals,v;

123:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
124:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
125:     MatDenseGetArray(GEd,&vals);
126:     /* v    = PetscAbsScalar(vals[0]) */;
127:     v    = 1.;
128:     cvals[0] = vals[0]/v;
129:     cvals[1] = vals[1]/v;
130:     MatDenseRestoreArray(GEd,&vals);
131:     MatScale(*GKins,1./v);
132: #if defined(PRINT_GDET)
133:     {
134:       PetscViewer viewer;
135:       char filename[256];
136:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
137:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
138:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
139:       PetscObjectSetName((PetscObject)GEc,"GEc");
140:       MatView(GEc,viewer);
141:       PetscObjectSetName((PetscObject)(*GKins),"GK");
142:       MatView(*GKins,viewer);
143:       PetscObjectSetName((PetscObject)GEd,"Gproj");
144:       MatView(GEd,viewer);
145:       PetscViewerDestroy(&viewer);
146:     }
147: #endif
148:     MatDestroy(&GEd);
149:     MatDestroy(&GEc);
150:   }

152:   return(0);
153: }

155: PetscErrorCode PCBDDCNedelecSupport(PC pc)
156: {
157:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
158:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
159:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
160:   Vec                    tvec;
161:   PetscSF                sfv;
162:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
163:   MPI_Comm               comm;
164:   IS                     lned,primals,allprimals,nedfieldlocal;
165:   IS                     *eedges,*extrows,*extcols,*alleedges;
166:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
167:   PetscScalar            *vals,*work;
168:   PetscReal              *rwork;
169:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
170:   PetscInt               ne,nv,Lv,order,n,field;
171:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
172:   PetscInt               i,j,extmem,cum,maxsize,nee;
173:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
174:   PetscInt               *sfvleaves,*sfvroots;
175:   PetscInt               *corners,*cedges;
176:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
177: #if defined(PETSC_USE_DEBUG)
178:   PetscInt               *emarks;
179: #endif
180:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
181:   PetscErrorCode         ierr;

184:   /* If the discrete gradient is defined for a subset of dofs and global is true,
185:      it assumes G is given in global ordering for all the dofs.
186:      Otherwise, the ordering is global for the Nedelec field */
187:   order      = pcbddc->nedorder;
188:   conforming = pcbddc->conforming;
189:   field      = pcbddc->nedfield;
190:   global     = pcbddc->nedglobal;
191:   setprimal  = PETSC_FALSE;
192:   print      = PETSC_FALSE;
193:   singular   = PETSC_FALSE;

195:   /* Command line customization */
196:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
197:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
198:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
199:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
200:   /* print debug info TODO: to be removed */
201:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
202:   PetscOptionsEnd();

204:   /* Return if there are no edges in the decomposition and the problem is not singular */
205:   MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
206:   ISLocalToGlobalMappingGetSize(al2g,&n);
207:   PetscObjectGetComm((PetscObject)pc,&comm);
208:   if (!singular) {
209:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
210:     lrc[0] = PETSC_FALSE;
211:     for (i=0;i<n;i++) {
212:       if (PetscRealPart(vals[i]) > 2.) {
213:         lrc[0] = PETSC_TRUE;
214:         break;
215:       }
216:     }
217:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
218:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
219:     if (!lrc[1]) return(0);
220:   }

222:   /* Get Nedelec field */
223:   if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
224:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
225:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
226:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
227:     ISGetLocalSize(nedfieldlocal,&ne);
228:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
229:     ne            = n;
230:     nedfieldlocal = NULL;
231:     global        = PETSC_TRUE;
232:   } else if (field == PETSC_DECIDE) {
233:     PetscInt rst,ren,*idx;

235:     PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
236:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
237:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
238:     for (i=rst;i<ren;i++) {
239:       PetscInt nc;

241:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
242:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
243:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
244:     }
245:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
246:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247:     PetscMalloc1(n,&idx);
248:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
249:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
250:   } else {
251:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
252:   }

254:   /* Sanity checks */
255:   if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
256:   if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
257:   if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);

259:   /* Just set primal dofs and return */
260:   if (setprimal) {
261:     IS       enedfieldlocal;
262:     PetscInt *eidxs;

264:     PetscMalloc1(ne,&eidxs);
265:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
266:     if (nedfieldlocal) {
267:       ISGetIndices(nedfieldlocal,&idxs);
268:       for (i=0,cum=0;i<ne;i++) {
269:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
270:           eidxs[cum++] = idxs[i];
271:         }
272:       }
273:       ISRestoreIndices(nedfieldlocal,&idxs);
274:     } else {
275:       for (i=0,cum=0;i<ne;i++) {
276:         if (PetscRealPart(vals[i]) > 2.) {
277:           eidxs[cum++] = i;
278:         }
279:       }
280:     }
281:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
282:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
283:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
284:     PetscFree(eidxs);
285:     ISDestroy(&nedfieldlocal);
286:     ISDestroy(&enedfieldlocal);
287:     return(0);
288:   }

290:   /* Compute some l2g maps */
291:   if (nedfieldlocal) {
292:     IS is;

294:     /* need to map from the local Nedelec field to local numbering */
295:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
296:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
297:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
298:     ISLocalToGlobalMappingCreateIS(is,&al2g);
299:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
300:     if (global) {
301:       PetscObjectReference((PetscObject)al2g);
302:       el2g = al2g;
303:     } else {
304:       IS gis;

306:       ISRenumber(is,NULL,NULL,&gis);
307:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
308:       ISDestroy(&gis);
309:     }
310:     ISDestroy(&is);
311:   } else {
312:     /* restore default */
313:     pcbddc->nedfield = -1;
314:     /* one ref for the destruction of al2g, one for el2g */
315:     PetscObjectReference((PetscObject)al2g);
316:     PetscObjectReference((PetscObject)al2g);
317:     el2g = al2g;
318:     fl2g = NULL;
319:   }

321:   /* Start communication to drop connections for interior edges (for cc analysis only) */
322:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
323:   PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
324:   if (nedfieldlocal) {
325:     ISGetIndices(nedfieldlocal,&idxs);
326:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
327:     ISRestoreIndices(nedfieldlocal,&idxs);
328:   } else {
329:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
330:   }
331:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
332:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

334:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
335:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
336:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
337:     if (global) {
338:       PetscInt rst;

340:       MatGetOwnershipRange(G,&rst,NULL);
341:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
342:         if (matis->sf_rootdata[i] < 2) {
343:           matis->sf_rootdata[cum++] = i + rst;
344:         }
345:       }
346:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
347:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
348:     } else {
349:       PetscInt *tbz;

351:       PetscMalloc1(ne,&tbz);
352:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
353:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354:       ISGetIndices(nedfieldlocal,&idxs);
355:       for (i=0,cum=0;i<ne;i++)
356:         if (matis->sf_leafdata[idxs[i]] == 1)
357:           tbz[cum++] = i;
358:       ISRestoreIndices(nedfieldlocal,&idxs);
359:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
360:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
361:       PetscFree(tbz);
362:     }
363:   } else { /* we need the entire G to infer the nullspace */
364:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
365:     G    = pcbddc->discretegradient;
366:   }

368:   /* Extract subdomain relevant rows of G */
369:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
370:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
371:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
372:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
373:   ISDestroy(&lned);
374:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
375:   MatDestroy(&lGall);
376:   MatISGetLocalMat(lGis,&lG);

378:   /* SF for nodal dofs communications */
379:   MatGetLocalSize(G,NULL,&Lv);
380:   MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
381:   PetscObjectReference((PetscObject)vl2g);
382:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
383:   PetscSFCreate(comm,&sfv);
384:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
385:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
386:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
387:   i    = singular ? 2 : 1;
388:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

390:   /* Destroy temporary G created in MATIS format and modified G */
391:   PetscObjectReference((PetscObject)lG);
392:   MatDestroy(&lGis);
393:   MatDestroy(&G);

395:   if (print) {
396:     PetscObjectSetName((PetscObject)lG,"initial_lG");
397:     MatView(lG,NULL);
398:   }

400:   /* Save lG for values insertion in change of basis */
401:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

403:   /* Analyze the edge-nodes connections (duplicate lG) */
404:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
405:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
406:   PetscBTCreate(nv,&btv);
407:   PetscBTCreate(ne,&bte);
408:   PetscBTCreate(ne,&btb);
409:   PetscBTCreate(ne,&btbd);
410:   PetscBTCreate(nv,&btvcand);
411:   /* need to import the boundary specification to ensure the
412:      proper detection of coarse edges' endpoints */
413:   if (pcbddc->DirichletBoundariesLocal) {
414:     IS is;

416:     if (fl2g) {
417:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
418:     } else {
419:       is = pcbddc->DirichletBoundariesLocal;
420:     }
421:     ISGetLocalSize(is,&cum);
422:     ISGetIndices(is,&idxs);
423:     for (i=0;i<cum;i++) {
424:       if (idxs[i] >= 0) {
425:         PetscBTSet(btb,idxs[i]);
426:         PetscBTSet(btbd,idxs[i]);
427:       }
428:     }
429:     ISRestoreIndices(is,&idxs);
430:     if (fl2g) {
431:       ISDestroy(&is);
432:     }
433:   }
434:   if (pcbddc->NeumannBoundariesLocal) {
435:     IS is;

437:     if (fl2g) {
438:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
439:     } else {
440:       is = pcbddc->NeumannBoundariesLocal;
441:     }
442:     ISGetLocalSize(is,&cum);
443:     ISGetIndices(is,&idxs);
444:     for (i=0;i<cum;i++) {
445:       if (idxs[i] >= 0) {
446:         PetscBTSet(btb,idxs[i]);
447:       }
448:     }
449:     ISRestoreIndices(is,&idxs);
450:     if (fl2g) {
451:       ISDestroy(&is);
452:     }
453:   }

455:   /* Count neighs per dof */
456:   ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
457:   ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);

459:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
460:      for proper detection of coarse edges' endpoints */
461:   PetscBTCreate(ne,&btee);
462:   for (i=0;i<ne;i++) {
463:     if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
464:       PetscBTSet(btee,i);
465:     }
466:   }
467:   PetscMalloc1(ne,&marks);
468:   if (!conforming) {
469:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
470:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
471:   }
472:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
473:   MatSeqAIJGetArray(lGe,&vals);
474:   cum  = 0;
475:   for (i=0;i<ne;i++) {
476:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
477:     if (!PetscBTLookup(btee,i)) {
478:       marks[cum++] = i;
479:       continue;
480:     }
481:     /* set badly connected edge dofs as primal */
482:     if (!conforming) {
483:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
484:         marks[cum++] = i;
485:         PetscBTSet(bte,i);
486:         for (j=ii[i];j<ii[i+1];j++) {
487:           PetscBTSet(btv,jj[j]);
488:         }
489:       } else {
490:         /* every edge dofs should be connected trough a certain number of nodal dofs
491:            to other edge dofs belonging to coarse edges
492:            - at most 2 endpoints
493:            - order-1 interior nodal dofs
494:            - no undefined nodal dofs (nconn < order)
495:         */
496:         PetscInt ends = 0,ints = 0, undef = 0;
497:         for (j=ii[i];j<ii[i+1];j++) {
498:           PetscInt v = jj[j],k;
499:           PetscInt nconn = iit[v+1]-iit[v];
500:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
501:           if (nconn > order) ends++;
502:           else if (nconn == order) ints++;
503:           else undef++;
504:         }
505:         if (undef || ends > 2 || ints != order -1) {
506:           marks[cum++] = i;
507:           PetscBTSet(bte,i);
508:           for (j=ii[i];j<ii[i+1];j++) {
509:             PetscBTSet(btv,jj[j]);
510:           }
511:         }
512:       }
513:     }
514:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
515:     if (!order && ii[i+1] != ii[i]) {
516:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
517:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
518:     }
519:   }
520:   PetscBTDestroy(&btee);
521:   MatSeqAIJRestoreArray(lGe,&vals);
522:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
523:   if (!conforming) {
524:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
525:     MatDestroy(&lGt);
526:   }
527:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

529:   /* identify splitpoints and corner candidates */
530:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
531:   if (print) {
532:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
533:     MatView(lGe,NULL);
534:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
535:     MatView(lGt,NULL);
536:   }
537:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
538:   MatSeqAIJGetArray(lGt,&vals);
539:   for (i=0;i<nv;i++) {
540:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
541:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
542:     if (!order) { /* variable order */
543:       PetscReal vorder = 0.;

545:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
546:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
547:       if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
548:       ord  = 1;
549:     }
550: #if defined(PETSC_USE_DEBUG)
551:     if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
552: #endif
553:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
554:       if (PetscBTLookup(btbd,jj[j])) {
555:         bdir = PETSC_TRUE;
556:         break;
557:       }
558:       if (vc != ecount[jj[j]]) {
559:         sneighs = PETSC_FALSE;
560:       } else {
561:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
562:         for (k=0;k<vc;k++) {
563:           if (vn[k] != en[k]) {
564:             sneighs = PETSC_FALSE;
565:             break;
566:           }
567:         }
568:       }
569:     }
570:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
571:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
572:       PetscBTSet(btv,i);
573:     } else if (test == ord) {
574:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
575:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
576:         PetscBTSet(btv,i);
577:       } else {
578:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
579:         PetscBTSet(btvcand,i);
580:       }
581:     }
582:   }
583:   ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
584:   ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
585:   PetscBTDestroy(&btbd);

587:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
588:   if (order != 1) {
589:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
590:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
591:     for (i=0;i<nv;i++) {
592:       if (PetscBTLookup(btvcand,i)) {
593:         PetscBool found = PETSC_FALSE;
594:         for (j=ii[i];j<ii[i+1] && !found;j++) {
595:           PetscInt k,e = jj[j];
596:           if (PetscBTLookup(bte,e)) continue;
597:           for (k=iit[e];k<iit[e+1];k++) {
598:             PetscInt v = jjt[k];
599:             if (v != i && PetscBTLookup(btvcand,v)) {
600:               found = PETSC_TRUE;
601:               break;
602:             }
603:           }
604:         }
605:         if (!found) {
606:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D CLEARED\n",i);
607:           PetscBTClear(btvcand,i);
608:         } else {
609:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D ACCEPTED\n",i);
610:         }
611:       }
612:     }
613:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
614:   }
615:   MatSeqAIJRestoreArray(lGt,&vals);
616:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
617:   MatDestroy(&lGe);

619:   /* Get the local G^T explicitly */
620:   MatDestroy(&lGt);
621:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
622:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

624:   /* Mark interior nodal dofs */
625:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
626:   PetscBTCreate(nv,&btvi);
627:   for (i=1;i<n_neigh;i++) {
628:     for (j=0;j<n_shared[i];j++) {
629:       PetscBTSet(btvi,shared[i][j]);
630:     }
631:   }
632:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

634:   /* communicate corners and splitpoints */
635:   PetscMalloc1(nv,&vmarks);
636:   PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
637:   PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
638:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

640:   if (print) {
641:     IS tbz;

643:     cum = 0;
644:     for (i=0;i<nv;i++)
645:       if (sfvleaves[i])
646:         vmarks[cum++] = i;

648:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
649:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
650:     ISView(tbz,NULL);
651:     ISDestroy(&tbz);
652:   }

654:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
655:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
656:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
657:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);

659:   /* Zero rows of lGt corresponding to identified corners
660:      and interior nodal dofs */
661:   cum = 0;
662:   for (i=0;i<nv;i++) {
663:     if (sfvleaves[i]) {
664:       vmarks[cum++] = i;
665:       PetscBTSet(btv,i);
666:     }
667:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
668:   }
669:   PetscBTDestroy(&btvi);
670:   if (print) {
671:     IS tbz;

673:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
674:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
675:     ISView(tbz,NULL);
676:     ISDestroy(&tbz);
677:   }
678:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
679:   PetscFree(vmarks);
680:   PetscSFDestroy(&sfv);
681:   PetscFree2(sfvleaves,sfvroots);

683:   /* Recompute G */
684:   MatDestroy(&lG);
685:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
686:   if (print) {
687:     PetscObjectSetName((PetscObject)lG,"used_lG");
688:     MatView(lG,NULL);
689:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
690:     MatView(lGt,NULL);
691:   }

693:   /* Get primal dofs (if any) */
694:   cum = 0;
695:   for (i=0;i<ne;i++) {
696:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
697:   }
698:   if (fl2g) {
699:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
700:   }
701:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
702:   if (print) {
703:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
704:     ISView(primals,NULL);
705:   }
706:   PetscBTDestroy(&bte);
707:   /* TODO: what if the user passed in some of them ?  */
708:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
709:   ISDestroy(&primals);

711:   /* Compute edge connectivity */
712:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
713:   MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
714:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
715:   if (fl2g) {
716:     PetscBT   btf;
717:     PetscInt  *iia,*jja,*iiu,*jju;
718:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

720:     /* create CSR for all local dofs */
721:     PetscMalloc1(n+1,&iia);
722:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
723:       if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
724:       iiu = pcbddc->mat_graph->xadj;
725:       jju = pcbddc->mat_graph->adjncy;
726:     } else if (pcbddc->use_local_adj) {
727:       rest = PETSC_TRUE;
728:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
729:     } else {
730:       free   = PETSC_TRUE;
731:       PetscMalloc2(n+1,&iiu,n,&jju);
732:       iiu[0] = 0;
733:       for (i=0;i<n;i++) {
734:         iiu[i+1] = i+1;
735:         jju[i]   = -1;
736:       }
737:     }

739:     /* import sizes of CSR */
740:     iia[0] = 0;
741:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

743:     /* overwrite entries corresponding to the Nedelec field */
744:     PetscBTCreate(n,&btf);
745:     ISGetIndices(nedfieldlocal,&idxs);
746:     for (i=0;i<ne;i++) {
747:       PetscBTSet(btf,idxs[i]);
748:       iia[idxs[i]+1] = ii[i+1]-ii[i];
749:     }

751:     /* iia in CSR */
752:     for (i=0;i<n;i++) iia[i+1] += iia[i];

754:     /* jja in CSR */
755:     PetscMalloc1(iia[n],&jja);
756:     for (i=0;i<n;i++)
757:       if (!PetscBTLookup(btf,i))
758:         for (j=0;j<iiu[i+1]-iiu[i];j++)
759:           jja[iia[i]+j] = jju[iiu[i]+j];

761:     /* map edge dofs connectivity */
762:     if (jj) {
763:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
764:       for (i=0;i<ne;i++) {
765:         PetscInt e = idxs[i];
766:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
767:       }
768:     }
769:     ISRestoreIndices(nedfieldlocal,&idxs);
770:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
771:     if (rest) {
772:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
773:     }
774:     if (free) {
775:       PetscFree2(iiu,jju);
776:     }
777:     PetscBTDestroy(&btf);
778:   } else {
779:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
780:   }

782:   /* Analyze interface for edge dofs */
783:   PCBDDCAnalyzeInterface(pc);
784:   pcbddc->mat_graph->twodim = PETSC_FALSE;

786:   /* Get coarse edges in the edge space */
787:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
788:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

790:   if (fl2g) {
791:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
792:     PetscMalloc1(nee,&eedges);
793:     for (i=0;i<nee;i++) {
794:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
795:     }
796:   } else {
797:     eedges  = alleedges;
798:     primals = allprimals;
799:   }

801:   /* Mark fine edge dofs with their coarse edge id */
802:   PetscMemzero(marks,ne*sizeof(PetscInt));
803:   ISGetLocalSize(primals,&cum);
804:   ISGetIndices(primals,&idxs);
805:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
806:   ISRestoreIndices(primals,&idxs);
807:   if (print) {
808:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
809:     ISView(primals,NULL);
810:   }

812:   maxsize = 0;
813:   for (i=0;i<nee;i++) {
814:     PetscInt size,mark = i+1;

816:     ISGetLocalSize(eedges[i],&size);
817:     ISGetIndices(eedges[i],&idxs);
818:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
819:     ISRestoreIndices(eedges[i],&idxs);
820:     maxsize = PetscMax(maxsize,size);
821:   }

823:   /* Find coarse edge endpoints */
824:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
825:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
826:   for (i=0;i<nee;i++) {
827:     PetscInt mark = i+1,size;

829:     ISGetLocalSize(eedges[i],&size);
830:     if (!size && nedfieldlocal) continue;
831:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
832:     ISGetIndices(eedges[i],&idxs);
833:     if (print) {
834:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
835:       ISView(eedges[i],NULL);
836:     }
837:     for (j=0;j<size;j++) {
838:       PetscInt k, ee = idxs[j];
839:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %D\n",ee);
840:       for (k=ii[ee];k<ii[ee+1];k++) {
841:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %D\n",jj[k]);
842:         if (PetscBTLookup(btv,jj[k])) {
843:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %D\n",jj[k]);
844:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
845:           PetscInt  k2;
846:           PetscBool corner = PETSC_FALSE;
847:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
848:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
849:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
850:                if the edge dof lie on the natural part of the boundary */
851:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
852:               corner = PETSC_TRUE;
853:               break;
854:             }
855:           }
856:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
857:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %D\n",jj[k]);
858:             PetscBTSet(btv,jj[k]);
859:           } else {
860:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
861:           }
862:         }
863:       }
864:     }
865:     ISRestoreIndices(eedges[i],&idxs);
866:   }
867:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
868:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
869:   PetscBTDestroy(&btb);

871:   /* Reset marked primal dofs */
872:   ISGetLocalSize(primals,&cum);
873:   ISGetIndices(primals,&idxs);
874:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
875:   ISRestoreIndices(primals,&idxs);

877:   /* Now use the initial lG */
878:   MatDestroy(&lG);
879:   MatDestroy(&lGt);
880:   lG   = lGinit;
881:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

883:   /* Compute extended cols indices */
884:   PetscBTCreate(nv,&btvc);
885:   PetscBTCreate(nee,&bter);
886:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
887:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
888:   i   *= maxsize;
889:   PetscCalloc1(nee,&extcols);
890:   PetscMalloc2(i,&extrow,i,&gidxs);
891:   eerr = PETSC_FALSE;
892:   for (i=0;i<nee;i++) {
893:     PetscInt size,found = 0;

895:     cum  = 0;
896:     ISGetLocalSize(eedges[i],&size);
897:     if (!size && nedfieldlocal) continue;
898:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
899:     ISGetIndices(eedges[i],&idxs);
900:     PetscBTMemzero(nv,btvc);
901:     for (j=0;j<size;j++) {
902:       PetscInt k,ee = idxs[j];
903:       for (k=ii[ee];k<ii[ee+1];k++) {
904:         PetscInt vv = jj[k];
905:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
906:         else if (!PetscBTLookupSet(btvc,vv)) found++;
907:       }
908:     }
909:     ISRestoreIndices(eedges[i],&idxs);
910:     PetscSortRemoveDupsInt(&cum,extrow);
911:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
912:     PetscSortIntWithArray(cum,gidxs,extrow);
913:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
914:     /* it may happen that endpoints are not defined at this point
915:        if it is the case, mark this edge for a second pass */
916:     if (cum != size -1 || found != 2) {
917:       PetscBTSet(bter,i);
918:       if (print) {
919:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
920:         ISView(eedges[i],NULL);
921:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
922:         ISView(extcols[i],NULL);
923:       }
924:       eerr = PETSC_TRUE;
925:     }
926:   }
927:   /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
928:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
929:   if (done) {
930:     PetscInt *newprimals;

932:     PetscMalloc1(ne,&newprimals);
933:     ISGetLocalSize(primals,&cum);
934:     ISGetIndices(primals,&idxs);
935:     PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
936:     ISRestoreIndices(primals,&idxs);
937:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
938:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
939:     for (i=0;i<nee;i++) {
940:       PetscBool has_candidates = PETSC_FALSE;
941:       if (PetscBTLookup(bter,i)) {
942:         PetscInt size,mark = i+1;

944:         ISGetLocalSize(eedges[i],&size);
945:         ISGetIndices(eedges[i],&idxs);
946:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
947:         for (j=0;j<size;j++) {
948:           PetscInt k,ee = idxs[j];
949:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
950:           for (k=ii[ee];k<ii[ee+1];k++) {
951:             /* set all candidates located on the edge as corners */
952:             if (PetscBTLookup(btvcand,jj[k])) {
953:               PetscInt k2,vv = jj[k];
954:               has_candidates = PETSC_TRUE;
955:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %D\n",vv);
956:               PetscBTSet(btv,vv);
957:               /* set all edge dofs connected to candidate as primals */
958:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
959:                 if (marks[jjt[k2]] == mark) {
960:                   PetscInt k3,ee2 = jjt[k2];
961:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %D\n",ee2);
962:                   newprimals[cum++] = ee2;
963:                   /* finally set the new corners */
964:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
965:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %D\n",jj[k3]);
966:                     PetscBTSet(btv,jj[k3]);
967:                   }
968:                 }
969:               }
970:             } else {
971:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %D\n",jj[k]);
972:             }
973:           }
974:         }
975:         if (!has_candidates) { /* circular edge */
976:           PetscInt k, ee = idxs[0],*tmarks;

978:           PetscCalloc1(ne,&tmarks);
979:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %D\n",i);
980:           for (k=ii[ee];k<ii[ee+1];k++) {
981:             PetscInt k2;
982:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %D\n",jj[k]);
983:             PetscBTSet(btv,jj[k]);
984:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
985:           }
986:           for (j=0;j<size;j++) {
987:             if (tmarks[idxs[j]] > 1) {
988:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %D\n",idxs[j]);
989:               newprimals[cum++] = idxs[j];
990:             }
991:           }
992:           PetscFree(tmarks);
993:         }
994:         ISRestoreIndices(eedges[i],&idxs);
995:       }
996:       ISDestroy(&extcols[i]);
997:     }
998:     PetscFree(extcols);
999:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1000:     PetscSortRemoveDupsInt(&cum,newprimals);
1001:     if (fl2g) {
1002:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1003:       ISDestroy(&primals);
1004:       for (i=0;i<nee;i++) {
1005:         ISDestroy(&eedges[i]);
1006:       }
1007:       PetscFree(eedges);
1008:     }
1009:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1010:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1011:     PetscFree(newprimals);
1012:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1013:     ISDestroy(&primals);
1014:     PCBDDCAnalyzeInterface(pc);
1015:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1016:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1017:     if (fl2g) {
1018:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1019:       PetscMalloc1(nee,&eedges);
1020:       for (i=0;i<nee;i++) {
1021:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1022:       }
1023:     } else {
1024:       eedges  = alleedges;
1025:       primals = allprimals;
1026:     }
1027:     PetscCalloc1(nee,&extcols);

1029:     /* Mark again */
1030:     PetscMemzero(marks,ne*sizeof(PetscInt));
1031:     for (i=0;i<nee;i++) {
1032:       PetscInt size,mark = i+1;

1034:       ISGetLocalSize(eedges[i],&size);
1035:       ISGetIndices(eedges[i],&idxs);
1036:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1037:       ISRestoreIndices(eedges[i],&idxs);
1038:     }
1039:     if (print) {
1040:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1041:       ISView(primals,NULL);
1042:     }

1044:     /* Recompute extended cols */
1045:     eerr = PETSC_FALSE;
1046:     for (i=0;i<nee;i++) {
1047:       PetscInt size;

1049:       cum  = 0;
1050:       ISGetLocalSize(eedges[i],&size);
1051:       if (!size && nedfieldlocal) continue;
1052:       if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1053:       ISGetIndices(eedges[i],&idxs);
1054:       for (j=0;j<size;j++) {
1055:         PetscInt k,ee = idxs[j];
1056:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1057:       }
1058:       ISRestoreIndices(eedges[i],&idxs);
1059:       PetscSortRemoveDupsInt(&cum,extrow);
1060:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1061:       PetscSortIntWithArray(cum,gidxs,extrow);
1062:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1063:       if (cum != size -1) {
1064:         if (print) {
1065:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1066:           ISView(eedges[i],NULL);
1067:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1068:           ISView(extcols[i],NULL);
1069:         }
1070:         eerr = PETSC_TRUE;
1071:       }
1072:     }
1073:   }
1074:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1075:   PetscFree2(extrow,gidxs);
1076:   PetscBTDestroy(&bter);
1077:   if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1078:   /* an error should not occur at this point */
1079:   if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1081:   /* Check the number of endpoints */
1082:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1083:   PetscMalloc1(2*nee,&corners);
1084:   PetscMalloc1(nee,&cedges);
1085:   for (i=0;i<nee;i++) {
1086:     PetscInt size, found = 0, gc[2];

1088:     /* init with defaults */
1089:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1090:     ISGetLocalSize(eedges[i],&size);
1091:     if (!size && nedfieldlocal) continue;
1092:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1093:     ISGetIndices(eedges[i],&idxs);
1094:     PetscBTMemzero(nv,btvc);
1095:     for (j=0;j<size;j++) {
1096:       PetscInt k,ee = idxs[j];
1097:       for (k=ii[ee];k<ii[ee+1];k++) {
1098:         PetscInt vv = jj[k];
1099:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1100:           if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1101:           corners[i*2+found++] = vv;
1102:         }
1103:       }
1104:     }
1105:     if (found != 2) {
1106:       PetscInt e;
1107:       if (fl2g) {
1108:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1109:       } else {
1110:         e = idxs[0];
1111:       }
1112:       SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1113:     }

1115:     /* get primal dof index on this coarse edge */
1116:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1117:     if (gc[0] > gc[1]) {
1118:       PetscInt swap  = corners[2*i];
1119:       corners[2*i]   = corners[2*i+1];
1120:       corners[2*i+1] = swap;
1121:     }
1122:     cedges[i] = idxs[size-1];
1123:     ISRestoreIndices(eedges[i],&idxs);
1124:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1125:   }
1126:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1127:   PetscBTDestroy(&btvc);

1129: #if defined(PETSC_USE_DEBUG)
1130:   /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1131:      not interfere with neighbouring coarse edges */
1132:   PetscMalloc1(nee+1,&emarks);
1133:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1134:   for (i=0;i<nv;i++) {
1135:     PetscInt emax = 0,eemax = 0;

1137:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1138:     PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1139:     for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1140:     for (j=1;j<nee+1;j++) {
1141:       if (emax < emarks[j]) {
1142:         emax = emarks[j];
1143:         eemax = j;
1144:       }
1145:     }
1146:     /* not relevant for edges */
1147:     if (!eemax) continue;

1149:     for (j=ii[i];j<ii[i+1];j++) {
1150:       if (marks[jj[j]] && marks[jj[j]] != eemax) {
1151:         SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1152:       }
1153:     }
1154:   }
1155:   PetscFree(emarks);
1156:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1157: #endif

1159:   /* Compute extended rows indices for edge blocks of the change of basis */
1160:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1161:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1162:   extmem *= maxsize;
1163:   PetscMalloc1(extmem*nee,&extrow);
1164:   PetscMalloc1(nee,&extrows);
1165:   PetscCalloc1(nee,&extrowcum);
1166:   for (i=0;i<nv;i++) {
1167:     PetscInt mark = 0,size,start;

1169:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1170:     for (j=ii[i];j<ii[i+1];j++)
1171:       if (marks[jj[j]] && !mark)
1172:         mark = marks[jj[j]];

1174:     /* not relevant */
1175:     if (!mark) continue;

1177:     /* import extended row */
1178:     mark--;
1179:     start = mark*extmem+extrowcum[mark];
1180:     size = ii[i+1]-ii[i];
1181:     if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1182:     PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1183:     extrowcum[mark] += size;
1184:   }
1185:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1186:   MatDestroy(&lGt);
1187:   PetscFree(marks);

1189:   /* Compress extrows */
1190:   cum  = 0;
1191:   for (i=0;i<nee;i++) {
1192:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1193:     PetscSortRemoveDupsInt(&size,start);
1194:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1195:     cum  = PetscMax(cum,size);
1196:   }
1197:   PetscFree(extrowcum);
1198:   PetscBTDestroy(&btv);
1199:   PetscBTDestroy(&btvcand);

1201:   /* Workspace for lapack inner calls and VecSetValues */
1202:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1204:   /* Create change of basis matrix (preallocation can be improved) */
1205:   MatCreate(comm,&T);
1206:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1207:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1208:   MatSetType(T,MATAIJ);
1209:   MatSeqAIJSetPreallocation(T,10,NULL);
1210:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1211:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1212:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1213:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1214:   ISLocalToGlobalMappingDestroy(&al2g);

1216:   /* Defaults to identity */
1217:   MatCreateVecs(pc->pmat,&tvec,NULL);
1218:   VecSet(tvec,1.0);
1219:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1220:   VecDestroy(&tvec);

1222:   /* Create discrete gradient for the coarser level if needed */
1223:   MatDestroy(&pcbddc->nedcG);
1224:   ISDestroy(&pcbddc->nedclocal);
1225:   if (pcbddc->current_level < pcbddc->max_levels) {
1226:     ISLocalToGlobalMapping cel2g,cvl2g;
1227:     IS                     wis,gwis;
1228:     PetscInt               cnv,cne;

1230:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1231:     if (fl2g) {
1232:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1233:     } else {
1234:       PetscObjectReference((PetscObject)wis);
1235:       pcbddc->nedclocal = wis;
1236:     }
1237:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1238:     ISDestroy(&wis);
1239:     ISRenumber(gwis,NULL,&cne,&wis);
1240:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1241:     ISDestroy(&wis);
1242:     ISDestroy(&gwis);

1244:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1245:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1246:     ISDestroy(&wis);
1247:     ISRenumber(gwis,NULL,&cnv,&wis);
1248:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1249:     ISDestroy(&wis);
1250:     ISDestroy(&gwis);

1252:     MatCreate(comm,&pcbddc->nedcG);
1253:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1254:     MatSetType(pcbddc->nedcG,MATAIJ);
1255:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1256:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1257:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1258:     ISLocalToGlobalMappingDestroy(&cel2g);
1259:     ISLocalToGlobalMappingDestroy(&cvl2g);
1260:   }
1261:   ISLocalToGlobalMappingDestroy(&vl2g);

1263: #if defined(PRINT_GDET)
1264:   inc = 0;
1265:   lev = pcbddc->current_level;
1266: #endif

1268:   /* Insert values in the change of basis matrix */
1269:   for (i=0;i<nee;i++) {
1270:     Mat         Gins = NULL, GKins = NULL;
1271:     IS          cornersis = NULL;
1272:     PetscScalar cvals[2];

1274:     if (pcbddc->nedcG) {
1275:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1276:     }
1277:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1278:     if (Gins && GKins) {
1279:       PetscScalar    *data;
1280:       const PetscInt *rows,*cols;
1281:       PetscInt       nrh,nch,nrc,ncc;

1283:       ISGetIndices(eedges[i],&cols);
1284:       /* H1 */
1285:       ISGetIndices(extrows[i],&rows);
1286:       MatGetSize(Gins,&nrh,&nch);
1287:       MatDenseGetArray(Gins,&data);
1288:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1289:       MatDenseRestoreArray(Gins,&data);
1290:       ISRestoreIndices(extrows[i],&rows);
1291:       /* complement */
1292:       MatGetSize(GKins,&nrc,&ncc);
1293:       if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1294:       if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1295:       if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1296:       MatDenseGetArray(GKins,&data);
1297:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1298:       MatDenseRestoreArray(GKins,&data);

1300:       /* coarse discrete gradient */
1301:       if (pcbddc->nedcG) {
1302:         PetscInt cols[2];

1304:         cols[0] = 2*i;
1305:         cols[1] = 2*i+1;
1306:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1307:       }
1308:       ISRestoreIndices(eedges[i],&cols);
1309:     }
1310:     ISDestroy(&extrows[i]);
1311:     ISDestroy(&extcols[i]);
1312:     ISDestroy(&cornersis);
1313:     MatDestroy(&Gins);
1314:     MatDestroy(&GKins);
1315:   }
1316:   ISLocalToGlobalMappingDestroy(&el2g);

1318:   /* Start assembling */
1319:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1320:   if (pcbddc->nedcG) {
1321:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1322:   }

1324:   /* Free */
1325:   if (fl2g) {
1326:     ISDestroy(&primals);
1327:     for (i=0;i<nee;i++) {
1328:       ISDestroy(&eedges[i]);
1329:     }
1330:     PetscFree(eedges);
1331:   }

1333:   /* hack mat_graph with primal dofs on the coarse edges */
1334:   {
1335:     PCBDDCGraph graph   = pcbddc->mat_graph;
1336:     PetscInt    *oqueue = graph->queue;
1337:     PetscInt    *ocptr  = graph->cptr;
1338:     PetscInt    ncc,*idxs;

1340:     /* find first primal edge */
1341:     if (pcbddc->nedclocal) {
1342:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1343:     } else {
1344:       if (fl2g) {
1345:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1346:       }
1347:       idxs = cedges;
1348:     }
1349:     cum = 0;
1350:     while (cum < nee && cedges[cum] < 0) cum++;

1352:     /* adapt connected components */
1353:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1354:     graph->cptr[0] = 0;
1355:     for (i=0,ncc=0;i<graph->ncc;i++) {
1356:       PetscInt lc = ocptr[i+1]-ocptr[i];
1357:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1358:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1359:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1360:         ncc++;
1361:         lc--;
1362:         cum++;
1363:         while (cum < nee && cedges[cum] < 0) cum++;
1364:       }
1365:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1366:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1367:       ncc++;
1368:     }
1369:     graph->ncc = ncc;
1370:     if (pcbddc->nedclocal) {
1371:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1372:     }
1373:     PetscFree2(ocptr,oqueue);
1374:   }
1375:   ISLocalToGlobalMappingDestroy(&fl2g);
1376:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1377:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1378:   MatDestroy(&conn);

1380:   ISDestroy(&nedfieldlocal);
1381:   PetscFree(extrow);
1382:   PetscFree2(work,rwork);
1383:   PetscFree(corners);
1384:   PetscFree(cedges);
1385:   PetscFree(extrows);
1386:   PetscFree(extcols);
1387:   MatDestroy(&lG);

1389:   /* Complete assembling */
1390:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1391:   if (pcbddc->nedcG) {
1392:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1393: #if 0
1394:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1395:     MatView(pcbddc->nedcG,NULL);
1396: #endif
1397:   }

1399:   /* set change of basis */
1400:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1401:   MatDestroy(&T);

1403:   return(0);
1404: }

1406: /* the near-null space of BDDC carries information on quadrature weights,
1407:    and these can be collinear -> so cheat with MatNullSpaceCreate
1408:    and create a suitable set of basis vectors first */
1409: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1410: {
1412:   PetscInt       i;

1415:   for (i=0;i<nvecs;i++) {
1416:     PetscInt first,last;

1418:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1419:     if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1420:     if (i>=first && i < last) {
1421:       PetscScalar *data;
1422:       VecGetArray(quad_vecs[i],&data);
1423:       if (!has_const) {
1424:         data[i-first] = 1.;
1425:       } else {
1426:         data[2*i-first] = 1./PetscSqrtReal(2.);
1427:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1428:       }
1429:       VecRestoreArray(quad_vecs[i],&data);
1430:     }
1431:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1432:   }
1433:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1434:   for (i=0;i<nvecs;i++) { /* reset vectors */
1435:     PetscInt first,last;
1436:     VecLockReadPop(quad_vecs[i]);
1437:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1438:     if (i>=first && i < last) {
1439:       PetscScalar *data;
1440:       VecGetArray(quad_vecs[i],&data);
1441:       if (!has_const) {
1442:         data[i-first] = 0.;
1443:       } else {
1444:         data[2*i-first] = 0.;
1445:         data[2*i-first+1] = 0.;
1446:       }
1447:       VecRestoreArray(quad_vecs[i],&data);
1448:     }
1449:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1450:     VecLockReadPush(quad_vecs[i]);
1451:   }
1452:   return(0);
1453: }

1455: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1456: {
1457:   Mat                    loc_divudotp;
1458:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1459:   ISLocalToGlobalMapping map;
1460:   PetscScalar            *vals;
1461:   const PetscScalar      *array;
1462:   PetscInt               i,maxneighs,maxsize,*gidxs;
1463:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
1464:   PetscMPIInt            rank;
1465:   PetscErrorCode         ierr;

1468:   ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1469:   MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1470:   if (!maxneighs) {
1471:     ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1472:     *nnsp = NULL;
1473:     return(0);
1474:   }
1475:   maxsize = 0;
1476:   for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1477:   PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1478:   /* create vectors to hold quadrature weights */
1479:   MatCreateVecs(A,&quad_vec,NULL);
1480:   if (!transpose) {
1481:     MatGetLocalToGlobalMapping(A,&map,NULL);
1482:   } else {
1483:     MatGetLocalToGlobalMapping(A,NULL,&map);
1484:   }
1485:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1486:   VecDestroy(&quad_vec);
1487:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1488:   for (i=0;i<maxneighs;i++) {
1489:     VecLockReadPop(quad_vecs[i]);
1490:   }

1492:   /* compute local quad vec */
1493:   MatISGetLocalMat(divudotp,&loc_divudotp);
1494:   if (!transpose) {
1495:     MatCreateVecs(loc_divudotp,&v,&p);
1496:   } else {
1497:     MatCreateVecs(loc_divudotp,&p,&v);
1498:   }
1499:   VecSet(p,1.);
1500:   if (!transpose) {
1501:     MatMultTranspose(loc_divudotp,p,v);
1502:   } else {
1503:     MatMult(loc_divudotp,p,v);
1504:   }
1505:   if (vl2l) {
1506:     Mat        lA;
1507:     VecScatter sc;

1509:     MatISGetLocalMat(A,&lA);
1510:     MatCreateVecs(lA,&vins,NULL);
1511:     VecScatterCreate(v,NULL,vins,vl2l,&sc);
1512:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1513:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1514:     VecScatterDestroy(&sc);
1515:   } else {
1516:     vins = v;
1517:   }
1518:   VecGetArrayRead(vins,&array);
1519:   VecDestroy(&p);

1521:   /* insert in global quadrature vecs */
1522:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1523:   for (i=0;i<n_neigh;i++) {
1524:     const PetscInt    *idxs;
1525:     PetscInt          idx,nn,j;

1527:     idxs = shared[i];
1528:     nn   = n_shared[i];
1529:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1530:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1531:     idx  = -(idx+1);
1532:     ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1533:     VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1534:   }
1535:   ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1536:   VecRestoreArrayRead(vins,&array);
1537:   if (vl2l) {
1538:     VecDestroy(&vins);
1539:   }
1540:   VecDestroy(&v);
1541:   PetscFree2(gidxs,vals);

1543:   /* assemble near null space */
1544:   for (i=0;i<maxneighs;i++) {
1545:     VecAssemblyBegin(quad_vecs[i]);
1546:   }
1547:   for (i=0;i<maxneighs;i++) {
1548:     VecAssemblyEnd(quad_vecs[i]);
1549:     VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1550:     VecLockReadPush(quad_vecs[i]);
1551:   }
1552:   VecDestroyVecs(maxneighs,&quad_vecs);
1553:   return(0);
1554: }

1556: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1557: {
1558:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

1562:   if (primalv) {
1563:     if (pcbddc->user_primal_vertices_local) {
1564:       IS list[2], newp;

1566:       list[0] = primalv;
1567:       list[1] = pcbddc->user_primal_vertices_local;
1568:       ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1569:       ISSortRemoveDups(newp);
1570:       ISDestroy(&list[1]);
1571:       pcbddc->user_primal_vertices_local = newp;
1572:     } else {
1573:       PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1574:     }
1575:   }
1576:   return(0);
1577: }

1579: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1580: {
1581:   PetscInt f, *comp  = (PetscInt *)ctx;

1584:   for (f=0;f<Nf;f++) out[f] = X[*comp];
1585:   return(0);
1586: }

1588: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1589: {
1591:   Vec            local,global;
1592:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1593:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1594:   PetscBool      monolithic = PETSC_FALSE;

1597:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1598:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1599:   PetscOptionsEnd();
1600:   /* need to convert from global to local topology information and remove references to information in global ordering */
1601:   MatCreateVecs(pc->pmat,&global,NULL);
1602:   MatCreateVecs(matis->A,&local,NULL);
1603:   if (monolithic) { /* just get block size to properly compute vertices */
1604:     if (pcbddc->vertex_size == 1) {
1605:       MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1606:     }
1607:     goto boundary;
1608:   }

1610:   if (pcbddc->user_provided_isfordofs) {
1611:     if (pcbddc->n_ISForDofs) {
1612:       PetscInt i;

1614:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1615:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1616:         PetscInt bs;

1618:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1619:         ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1620:         ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1621:         ISDestroy(&pcbddc->ISForDofs[i]);
1622:       }
1623:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1624:       pcbddc->n_ISForDofs = 0;
1625:       PetscFree(pcbddc->ISForDofs);
1626:     }
1627:   } else {
1628:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1629:       DM dm;

1631:       MatGetDM(pc->pmat, &dm);
1632:       if (!dm) {
1633:         PCGetDM(pc, &dm);
1634:       }
1635:       if (dm) {
1636:         IS      *fields;
1637:         PetscInt nf,i;

1639:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1640:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1641:         for (i=0;i<nf;i++) {
1642:           PetscInt bs;

1644:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1645:           ISGetBlockSize(fields[i],&bs);
1646:           ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1647:           ISDestroy(&fields[i]);
1648:         }
1649:         PetscFree(fields);
1650:         pcbddc->n_ISForDofsLocal = nf;
1651:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1652:         PetscContainer   c;

1654:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1655:         if (c) {
1656:           MatISLocalFields lf;
1657:           PetscContainerGetPointer(c,(void**)&lf);
1658:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1659:         } else { /* fallback, create the default fields if bs > 1 */
1660:           PetscInt i, n = matis->A->rmap->n;
1661:           MatGetBlockSize(pc->pmat,&i);
1662:           if (i > 1) {
1663:             pcbddc->n_ISForDofsLocal = i;
1664:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1665:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1666:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1667:             }
1668:           }
1669:         }
1670:       }
1671:     } else {
1672:       PetscInt i;
1673:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1674:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1675:       }
1676:     }
1677:   }

1679: boundary:
1680:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1681:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1682:   } else if (pcbddc->DirichletBoundariesLocal) {
1683:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1684:   }
1685:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1686:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1687:   } else if (pcbddc->NeumannBoundariesLocal) {
1688:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1689:   }
1690:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1691:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1692:   }
1693:   VecDestroy(&global);
1694:   VecDestroy(&local);
1695:   /* detect local disconnected subdomains if requested (use matis->A) */
1696:   if (pcbddc->detect_disconnected) {
1697:     IS        primalv = NULL;
1698:     PetscInt  i;
1699:     PetscBool filter = pcbddc->detect_disconnected_filter;

1701:     for (i=0;i<pcbddc->n_local_subs;i++) {
1702:       ISDestroy(&pcbddc->local_subs[i]);
1703:     }
1704:     PetscFree(pcbddc->local_subs);
1705:     PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1706:     PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1707:     ISDestroy(&primalv);
1708:   }
1709:   /* early stage corner detection */
1710:   {
1711:     DM dm;

1713:     MatGetDM(pc->pmat,&dm);
1714:     if (!dm) {
1715:       PCGetDM(pc,&dm);
1716:     }
1717:     if (dm) {
1718:       PetscBool isda;

1720:       PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1721:       if (isda) {
1722:         ISLocalToGlobalMapping l2l;
1723:         IS                     corners;
1724:         Mat                    lA;
1725:         PetscBool              gl,lo;

1727:         {
1728:           Vec               cvec;
1729:           const PetscScalar *coords;
1730:           PetscInt          dof,n,cdim;
1731:           PetscBool         memc = PETSC_TRUE;

1733:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1734:           DMGetCoordinates(dm,&cvec);
1735:           VecGetLocalSize(cvec,&n);
1736:           VecGetBlockSize(cvec,&cdim);
1737:           n   /= cdim;
1738:           PetscFree(pcbddc->mat_graph->coords);
1739:           PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1740:           VecGetArrayRead(cvec,&coords);
1741: #if defined(PETSC_USE_COMPLEX)
1742:           memc = PETSC_FALSE;
1743: #endif
1744:           if (dof != 1) memc = PETSC_FALSE;
1745:           if (memc) {
1746:             PetscMemcpy(pcbddc->mat_graph->coords,coords,cdim*n*dof*sizeof(PetscReal));
1747:           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1748:             PetscReal *bcoords = pcbddc->mat_graph->coords;
1749:             PetscInt  i, b, d;

1751:             for (i=0;i<n;i++) {
1752:               for (b=0;b<dof;b++) {
1753:                 for (d=0;d<cdim;d++) {
1754:                   bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1755:                 }
1756:               }
1757:             }
1758:           }
1759:           VecRestoreArrayRead(cvec,&coords);
1760:           pcbddc->mat_graph->cdim  = cdim;
1761:           pcbddc->mat_graph->cnloc = dof*n;
1762:           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1763:         }
1764:         DMDAGetSubdomainCornersIS(dm,&corners);
1765:         MatISGetLocalMat(pc->pmat,&lA);
1766:         MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1767:         MatISRestoreLocalMat(pc->pmat,&lA);
1768:         lo   = (PetscBool)(l2l && corners);
1769:         MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1770:         if (gl) { /* From PETSc's DMDA */
1771:           const PetscInt    *idx;
1772:           PetscInt          dof,bs,*idxout,n;

1774:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1775:           ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1776:           ISGetLocalSize(corners,&n);
1777:           ISGetIndices(corners,&idx);
1778:           if (bs == dof) {
1779:             PetscMalloc1(n,&idxout);
1780:             ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1781:           } else { /* the original DMDA local-to-local map have been modified */
1782:             PetscInt i,d;

1784:             PetscMalloc1(dof*n,&idxout);
1785:             for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1786:             ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);

1788:             bs = 1;
1789:             n *= dof;
1790:           }
1791:           ISRestoreIndices(corners,&idx);
1792:           DMDARestoreSubdomainCornersIS(dm,&corners);
1793:           ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1794:           PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1795:           ISDestroy(&corners);
1796:           pcbddc->corner_selected  = PETSC_TRUE;
1797:           pcbddc->corner_selection = PETSC_TRUE;
1798:         }
1799:         if (corners) {
1800:           DMDARestoreSubdomainCornersIS(dm,&corners);
1801:         }
1802:       }
1803:     }
1804:   }
1805:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1806:     DM dm;

1808:     MatGetDM(pc->pmat,&dm);
1809:     if (!dm) {
1810:       PCGetDM(pc,&dm);
1811:     }
1812:     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1813:       Vec            vcoords;
1814:       PetscSection   section;
1815:       PetscReal      *coords;
1816:       PetscInt       d,cdim,nl,nf,**ctxs;
1817:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);

1819:       DMGetCoordinateDim(dm,&cdim);
1820:       DMGetSection(dm,&section);
1821:       PetscSectionGetNumFields(section,&nf);
1822:       DMCreateGlobalVector(dm,&vcoords);
1823:       VecGetLocalSize(vcoords,&nl);
1824:       PetscMalloc1(nl*cdim,&coords);
1825:       PetscMalloc2(nf,&funcs,nf,&ctxs);
1826:       PetscMalloc1(nf,&ctxs[0]);
1827:       for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1828:       for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1829:       for (d=0;d<cdim;d++) {
1830:         PetscInt          i;
1831:         const PetscScalar *v;

1833:         for (i=0;i<nf;i++) ctxs[i][0] = d;
1834:         DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1835:         VecGetArrayRead(vcoords,&v);
1836:         for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1837:         VecRestoreArrayRead(vcoords,&v);
1838:       }
1839:       VecDestroy(&vcoords);
1840:       PCSetCoordinates(pc,cdim,nl,coords);
1841:       PetscFree(coords);
1842:       PetscFree(ctxs[0]);
1843:       PetscFree2(funcs,ctxs);
1844:     }
1845:   }
1846:   return(0);
1847: }

1849: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1850: {
1851:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1852:   PetscErrorCode  ierr;
1853:   IS              nis;
1854:   const PetscInt  *idxs;
1855:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1856:   PetscBool       *ld;

1859:   if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1860:   if (mop == MPI_LAND) {
1861:     /* init rootdata with true */
1862:     ld   = (PetscBool*) matis->sf_rootdata;
1863:     for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1864:   } else {
1865:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1866:   }
1867:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1868:   ISGetLocalSize(*is,&nd);
1869:   ISGetIndices(*is,&idxs);
1870:   ld   = (PetscBool*) matis->sf_leafdata;
1871:   for (i=0;i<nd;i++)
1872:     if (-1 < idxs[i] && idxs[i] < n)
1873:       ld[idxs[i]] = PETSC_TRUE;
1874:   ISRestoreIndices(*is,&idxs);
1875:   PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1876:   PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1877:   PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1878:   PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1879:   if (mop == MPI_LAND) {
1880:     PetscMalloc1(nd,&nidxs);
1881:   } else {
1882:     PetscMalloc1(n,&nidxs);
1883:   }
1884:   for (i=0,nnd=0;i<n;i++)
1885:     if (ld[i])
1886:       nidxs[nnd++] = i;
1887:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1888:   ISDestroy(is);
1889:   *is  = nis;
1890:   return(0);
1891: }

1893: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1894: {
1895:   PC_IS             *pcis = (PC_IS*)(pc->data);
1896:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1897:   PetscErrorCode    ierr;

1900:   if (!pcbddc->benign_have_null) {
1901:     return(0);
1902:   }
1903:   if (pcbddc->ChangeOfBasisMatrix) {
1904:     Vec swap;

1906:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1907:     swap = pcbddc->work_change;
1908:     pcbddc->work_change = r;
1909:     r = swap;
1910:   }
1911:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1912:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1913:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1914:   KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1915:   VecSet(z,0.);
1916:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1917:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1918:   if (pcbddc->ChangeOfBasisMatrix) {
1919:     pcbddc->work_change = r;
1920:     VecCopy(z,pcbddc->work_change);
1921:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1922:   }
1923:   return(0);
1924: }

1926: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1927: {
1928:   PCBDDCBenignMatMult_ctx ctx;
1929:   PetscErrorCode          ierr;
1930:   PetscBool               apply_right,apply_left,reset_x;

1933:   MatShellGetContext(A,&ctx);
1934:   if (transpose) {
1935:     apply_right = ctx->apply_left;
1936:     apply_left = ctx->apply_right;
1937:   } else {
1938:     apply_right = ctx->apply_right;
1939:     apply_left = ctx->apply_left;
1940:   }
1941:   reset_x = PETSC_FALSE;
1942:   if (apply_right) {
1943:     const PetscScalar *ax;
1944:     PetscInt          nl,i;

1946:     VecGetLocalSize(x,&nl);
1947:     VecGetArrayRead(x,&ax);
1948:     PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1949:     VecRestoreArrayRead(x,&ax);
1950:     for (i=0;i<ctx->benign_n;i++) {
1951:       PetscScalar    sum,val;
1952:       const PetscInt *idxs;
1953:       PetscInt       nz,j;
1954:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1955:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1956:       sum = 0.;
1957:       if (ctx->apply_p0) {
1958:         val = ctx->work[idxs[nz-1]];
1959:         for (j=0;j<nz-1;j++) {
1960:           sum += ctx->work[idxs[j]];
1961:           ctx->work[idxs[j]] += val;
1962:         }
1963:       } else {
1964:         for (j=0;j<nz-1;j++) {
1965:           sum += ctx->work[idxs[j]];
1966:         }
1967:       }
1968:       ctx->work[idxs[nz-1]] -= sum;
1969:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1970:     }
1971:     VecPlaceArray(x,ctx->work);
1972:     reset_x = PETSC_TRUE;
1973:   }
1974:   if (transpose) {
1975:     MatMultTranspose(ctx->A,x,y);
1976:   } else {
1977:     MatMult(ctx->A,x,y);
1978:   }
1979:   if (reset_x) {
1980:     VecResetArray(x);
1981:   }
1982:   if (apply_left) {
1983:     PetscScalar *ay;
1984:     PetscInt    i;

1986:     VecGetArray(y,&ay);
1987:     for (i=0;i<ctx->benign_n;i++) {
1988:       PetscScalar    sum,val;
1989:       const PetscInt *idxs;
1990:       PetscInt       nz,j;
1991:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1992:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1993:       val = -ay[idxs[nz-1]];
1994:       if (ctx->apply_p0) {
1995:         sum = 0.;
1996:         for (j=0;j<nz-1;j++) {
1997:           sum += ay[idxs[j]];
1998:           ay[idxs[j]] += val;
1999:         }
2000:         ay[idxs[nz-1]] += sum;
2001:       } else {
2002:         for (j=0;j<nz-1;j++) {
2003:           ay[idxs[j]] += val;
2004:         }
2005:         ay[idxs[nz-1]] = 0.;
2006:       }
2007:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2008:     }
2009:     VecRestoreArray(y,&ay);
2010:   }
2011:   return(0);
2012: }

2014: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2015: {

2019:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2020:   return(0);
2021: }

2023: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2024: {

2028:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2029:   return(0);
2030: }

2032: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2033: {
2034:   PC_IS                   *pcis = (PC_IS*)pc->data;
2035:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
2036:   PCBDDCBenignMatMult_ctx ctx;
2037:   PetscErrorCode          ierr;

2040:   if (!restore) {
2041:     Mat                A_IB,A_BI;
2042:     PetscScalar        *work;
2043:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

2045:     if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2046:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2047:     PetscMalloc1(pcis->n,&work);
2048:     MatCreate(PETSC_COMM_SELF,&A_IB);
2049:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2050:     MatSetType(A_IB,MATSHELL);
2051:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2052:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2053:     PetscNew(&ctx);
2054:     MatShellSetContext(A_IB,ctx);
2055:     ctx->apply_left = PETSC_TRUE;
2056:     ctx->apply_right = PETSC_FALSE;
2057:     ctx->apply_p0 = PETSC_FALSE;
2058:     ctx->benign_n = pcbddc->benign_n;
2059:     if (reuse) {
2060:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2061:       ctx->free = PETSC_FALSE;
2062:     } else { /* TODO: could be optimized for successive solves */
2063:       ISLocalToGlobalMapping N_to_D;
2064:       PetscInt               i;

2066:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2067:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2068:       for (i=0;i<pcbddc->benign_n;i++) {
2069:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2070:       }
2071:       ISLocalToGlobalMappingDestroy(&N_to_D);
2072:       ctx->free = PETSC_TRUE;
2073:     }
2074:     ctx->A = pcis->A_IB;
2075:     ctx->work = work;
2076:     MatSetUp(A_IB);
2077:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2078:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2079:     pcis->A_IB = A_IB;

2081:     /* A_BI as A_IB^T */
2082:     MatCreateTranspose(A_IB,&A_BI);
2083:     pcbddc->benign_original_mat = pcis->A_BI;
2084:     pcis->A_BI = A_BI;
2085:   } else {
2086:     if (!pcbddc->benign_original_mat) {
2087:       return(0);
2088:     }
2089:     MatShellGetContext(pcis->A_IB,&ctx);
2090:     MatDestroy(&pcis->A_IB);
2091:     pcis->A_IB = ctx->A;
2092:     ctx->A = NULL;
2093:     MatDestroy(&pcis->A_BI);
2094:     pcis->A_BI = pcbddc->benign_original_mat;
2095:     pcbddc->benign_original_mat = NULL;
2096:     if (ctx->free) {
2097:       PetscInt i;
2098:       for (i=0;i<ctx->benign_n;i++) {
2099:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
2100:       }
2101:       PetscFree(ctx->benign_zerodiag_subs);
2102:     }
2103:     PetscFree(ctx->work);
2104:     PetscFree(ctx);
2105:   }
2106:   return(0);
2107: }

2109: /* used just in bddc debug mode */
2110: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2111: {
2112:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2113:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2114:   Mat            An;

2118:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2119:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2120:   if (is1) {
2121:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2122:     MatDestroy(&An);
2123:   } else {
2124:     *B = An;
2125:   }
2126:   return(0);
2127: }

2129: /* TODO: add reuse flag */
2130: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2131: {
2132:   Mat            Bt;
2133:   PetscScalar    *a,*bdata;
2134:   const PetscInt *ii,*ij;
2135:   PetscInt       m,n,i,nnz,*bii,*bij;
2136:   PetscBool      flg_row;

2140:   MatGetSize(A,&n,&m);
2141:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2142:   MatSeqAIJGetArray(A,&a);
2143:   nnz = n;
2144:   for (i=0;i<ii[n];i++) {
2145:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2146:   }
2147:   PetscMalloc1(n+1,&bii);
2148:   PetscMalloc1(nnz,&bij);
2149:   PetscMalloc1(nnz,&bdata);
2150:   nnz = 0;
2151:   bii[0] = 0;
2152:   for (i=0;i<n;i++) {
2153:     PetscInt j;
2154:     for (j=ii[i];j<ii[i+1];j++) {
2155:       PetscScalar entry = a[j];
2156:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2157:         bij[nnz] = ij[j];
2158:         bdata[nnz] = entry;
2159:         nnz++;
2160:       }
2161:     }
2162:     bii[i+1] = nnz;
2163:   }
2164:   MatSeqAIJRestoreArray(A,&a);
2165:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2166:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2167:   {
2168:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2169:     b->free_a = PETSC_TRUE;
2170:     b->free_ij = PETSC_TRUE;
2171:   }
2172:   if (*B == A) {
2173:     MatDestroy(&A);
2174:   }
2175:   *B = Bt;
2176:   return(0);
2177: }

2179: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2180: {
2181:   Mat                    B = NULL;
2182:   DM                     dm;
2183:   IS                     is_dummy,*cc_n;
2184:   ISLocalToGlobalMapping l2gmap_dummy;
2185:   PCBDDCGraph            graph;
2186:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2187:   PetscInt               i,n;
2188:   PetscInt               *xadj,*adjncy;
2189:   PetscBool              isplex = PETSC_FALSE;
2190:   PetscErrorCode         ierr;

2193:   if (ncc) *ncc = 0;
2194:   if (cc) *cc = NULL;
2195:   if (primalv) *primalv = NULL;
2196:   PCBDDCGraphCreate(&graph);
2197:   MatGetDM(pc->pmat,&dm);
2198:   if (!dm) {
2199:     PCGetDM(pc,&dm);
2200:   }
2201:   if (dm) {
2202:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2203:   }
2204:   if (filter) isplex = PETSC_FALSE;

2206:   if (isplex) { /* this code has been modified from plexpartition.c */
2207:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2208:     PetscInt      *adj = NULL;
2209:     IS             cellNumbering;
2210:     const PetscInt *cellNum;
2211:     PetscBool      useCone, useClosure;
2212:     PetscSection   section;
2213:     PetscSegBuffer adjBuffer;
2214:     PetscSF        sfPoint;

2218:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2219:     DMGetPointSF(dm, &sfPoint);
2220:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2221:     /* Build adjacency graph via a section/segbuffer */
2222:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2223:     PetscSectionSetChart(section, pStart, pEnd);
2224:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2225:     /* Always use FVM adjacency to create partitioner graph */
2226:     DMGetBasicAdjacency(dm, &useCone, &useClosure);
2227:     DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2228:     DMPlexGetCellNumbering(dm, &cellNumbering);
2229:     ISGetIndices(cellNumbering, &cellNum);
2230:     for (n = 0, p = pStart; p < pEnd; p++) {
2231:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2232:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2233:       adjSize = PETSC_DETERMINE;
2234:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2235:       for (a = 0; a < adjSize; ++a) {
2236:         const PetscInt point = adj[a];
2237:         if (pStart <= point && point < pEnd) {
2238:           PetscInt *PETSC_RESTRICT pBuf;
2239:           PetscSectionAddDof(section, p, 1);
2240:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2241:           *pBuf = point;
2242:         }
2243:       }
2244:       n++;
2245:     }
2246:     DMSetBasicAdjacency(dm, useCone, useClosure);
2247:     /* Derive CSR graph from section/segbuffer */
2248:     PetscSectionSetUp(section);
2249:     PetscSectionGetStorageSize(section, &size);
2250:     PetscMalloc1(n+1, &xadj);
2251:     for (idx = 0, p = pStart; p < pEnd; p++) {
2252:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2253:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2254:     }
2255:     xadj[n] = size;
2256:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2257:     /* Clean up */
2258:     PetscSegBufferDestroy(&adjBuffer);
2259:     PetscSectionDestroy(&section);
2260:     PetscFree(adj);
2261:     graph->xadj = xadj;
2262:     graph->adjncy = adjncy;
2263:   } else {
2264:     Mat       A;
2265:     PetscBool isseqaij, flg_row;

2267:     MatISGetLocalMat(pc->pmat,&A);
2268:     if (!A->rmap->N || !A->cmap->N) {
2269:       PCBDDCGraphDestroy(&graph);
2270:       return(0);
2271:     }
2272:     PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2273:     if (!isseqaij && filter) {
2274:       PetscBool isseqdense;

2276:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2277:       if (!isseqdense) {
2278:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2279:       } else { /* TODO: rectangular case and LDA */
2280:         PetscScalar *array;
2281:         PetscReal   chop=1.e-6;

2283:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2284:         MatDenseGetArray(B,&array);
2285:         MatGetSize(B,&n,NULL);
2286:         for (i=0;i<n;i++) {
2287:           PetscInt j;
2288:           for (j=i+1;j<n;j++) {
2289:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2290:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2291:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2292:           }
2293:         }
2294:         MatDenseRestoreArray(B,&array);
2295:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2296:       }
2297:     } else {
2298:       PetscObjectReference((PetscObject)A);
2299:       B = A;
2300:     }
2301:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2303:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2304:     if (filter) {
2305:       PetscScalar *data;
2306:       PetscInt    j,cum;

2308:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2309:       MatSeqAIJGetArray(B,&data);
2310:       cum = 0;
2311:       for (i=0;i<n;i++) {
2312:         PetscInt t;

2314:         for (j=xadj[i];j<xadj[i+1];j++) {
2315:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2316:             continue;
2317:           }
2318:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2319:         }
2320:         t = xadj_filtered[i];
2321:         xadj_filtered[i] = cum;
2322:         cum += t;
2323:       }
2324:       MatSeqAIJRestoreArray(B,&data);
2325:       graph->xadj = xadj_filtered;
2326:       graph->adjncy = adjncy_filtered;
2327:     } else {
2328:       graph->xadj = xadj;
2329:       graph->adjncy = adjncy;
2330:     }
2331:   }
2332:   /* compute local connected components using PCBDDCGraph */
2333:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2334:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2335:   ISDestroy(&is_dummy);
2336:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2337:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2338:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2339:   PCBDDCGraphComputeConnectedComponents(graph);

2341:   /* partial clean up */
2342:   PetscFree2(xadj_filtered,adjncy_filtered);
2343:   if (B) {
2344:     PetscBool flg_row;
2345:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2346:     MatDestroy(&B);
2347:   }
2348:   if (isplex) {
2349:     PetscFree(xadj);
2350:     PetscFree(adjncy);
2351:   }

2353:   /* get back data */
2354:   if (isplex) {
2355:     if (ncc) *ncc = graph->ncc;
2356:     if (cc || primalv) {
2357:       Mat          A;
2358:       PetscBT      btv,btvt;
2359:       PetscSection subSection;
2360:       PetscInt     *ids,cum,cump,*cids,*pids;

2362:       DMPlexGetSubdomainSection(dm,&subSection);
2363:       MatISGetLocalMat(pc->pmat,&A);
2364:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2365:       PetscBTCreate(A->rmap->n,&btv);
2366:       PetscBTCreate(A->rmap->n,&btvt);

2368:       cids[0] = 0;
2369:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2370:         PetscInt j;

2372:         PetscBTMemzero(A->rmap->n,btvt);
2373:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2374:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2376:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2377:           for (k = 0; k < 2*size; k += 2) {
2378:             PetscInt s, p = closure[k], off, dof, cdof;

2380:             PetscSectionGetConstraintDof(subSection, p, &cdof);
2381:             PetscSectionGetOffset(subSection,p,&off);
2382:             PetscSectionGetDof(subSection,p,&dof);
2383:             for (s = 0; s < dof-cdof; s++) {
2384:               if (PetscBTLookupSet(btvt,off+s)) continue;
2385:               if (!PetscBTLookup(btv,off+s)) {
2386:                 ids[cum++] = off+s;
2387:               } else { /* cross-vertex */
2388:                 pids[cump++] = off+s;
2389:               }
2390:             }
2391:           }
2392:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2393:         }
2394:         cids[i+1] = cum;
2395:         /* mark dofs as already assigned */
2396:         for (j = cids[i]; j < cids[i+1]; j++) {
2397:           PetscBTSet(btv,ids[j]);
2398:         }
2399:       }
2400:       if (cc) {
2401:         PetscMalloc1(graph->ncc,&cc_n);
2402:         for (i = 0; i < graph->ncc; i++) {
2403:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2404:         }
2405:         *cc = cc_n;
2406:       }
2407:       if (primalv) {
2408:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2409:       }
2410:       PetscFree3(ids,cids,pids);
2411:       PetscBTDestroy(&btv);
2412:       PetscBTDestroy(&btvt);
2413:     }
2414:   } else {
2415:     if (ncc) *ncc = graph->ncc;
2416:     if (cc) {
2417:       PetscMalloc1(graph->ncc,&cc_n);
2418:       for (i=0;i<graph->ncc;i++) {
2419:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2420:       }
2421:       *cc = cc_n;
2422:     }
2423:   }
2424:   /* clean up graph */
2425:   graph->xadj = 0;
2426:   graph->adjncy = 0;
2427:   PCBDDCGraphDestroy(&graph);
2428:   return(0);
2429: }

2431: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2432: {
2433:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2434:   PC_IS*         pcis = (PC_IS*)(pc->data);
2435:   IS             dirIS = NULL;
2436:   PetscInt       i;

2440:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2441:   if (zerodiag) {
2442:     Mat            A;
2443:     Vec            vec3_N;
2444:     PetscScalar    *vals;
2445:     const PetscInt *idxs;
2446:     PetscInt       nz,*count;

2448:     /* p0 */
2449:     VecSet(pcis->vec1_N,0.);
2450:     PetscMalloc1(pcis->n,&vals);
2451:     ISGetLocalSize(zerodiag,&nz);
2452:     ISGetIndices(zerodiag,&idxs);
2453:     for (i=0;i<nz;i++) vals[i] = 1.;
2454:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2455:     VecAssemblyBegin(pcis->vec1_N);
2456:     VecAssemblyEnd(pcis->vec1_N);
2457:     /* v_I */
2458:     VecSetRandom(pcis->vec2_N,NULL);
2459:     for (i=0;i<nz;i++) vals[i] = 0.;
2460:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2461:     ISRestoreIndices(zerodiag,&idxs);
2462:     ISGetIndices(pcis->is_B_local,&idxs);
2463:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2464:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2465:     ISRestoreIndices(pcis->is_B_local,&idxs);
2466:     if (dirIS) {
2467:       PetscInt n;

2469:       ISGetLocalSize(dirIS,&n);
2470:       ISGetIndices(dirIS,&idxs);
2471:       for (i=0;i<n;i++) vals[i] = 0.;
2472:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2473:       ISRestoreIndices(dirIS,&idxs);
2474:     }
2475:     VecAssemblyBegin(pcis->vec2_N);
2476:     VecAssemblyEnd(pcis->vec2_N);
2477:     VecDuplicate(pcis->vec1_N,&vec3_N);
2478:     VecSet(vec3_N,0.);
2479:     MatISGetLocalMat(pc->pmat,&A);
2480:     MatMult(A,pcis->vec1_N,vec3_N);
2481:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2482:     if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2483:     PetscFree(vals);
2484:     VecDestroy(&vec3_N);

2486:     /* there should not be any pressure dofs lying on the interface */
2487:     PetscCalloc1(pcis->n,&count);
2488:     ISGetIndices(pcis->is_B_local,&idxs);
2489:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2490:     ISRestoreIndices(pcis->is_B_local,&idxs);
2491:     ISGetIndices(zerodiag,&idxs);
2492:     for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2493:     ISRestoreIndices(zerodiag,&idxs);
2494:     PetscFree(count);
2495:   }
2496:   ISDestroy(&dirIS);

2498:   /* check PCBDDCBenignGetOrSetP0 */
2499:   VecSetRandom(pcis->vec1_global,NULL);
2500:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2501:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2502:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2503:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2504:   for (i=0;i<pcbddc->benign_n;i++) {
2505:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2506:     if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2507:   }
2508:   return(0);
2509: }

2511: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2512: {
2513:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2514:   IS             pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2515:   PetscInt       nz,n,benign_n,bsp = 1;
2516:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2517:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2521:   if (reuse) goto project_b0;
2522:   PetscSFDestroy(&pcbddc->benign_sf);
2523:   MatDestroy(&pcbddc->benign_B0);
2524:   for (n=0;n<pcbddc->benign_n;n++) {
2525:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2526:   }
2527:   PetscFree(pcbddc->benign_zerodiag_subs);
2528:   has_null_pressures = PETSC_TRUE;
2529:   have_null = PETSC_TRUE;
2530:   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2531:      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2532:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2533:      If not, a change of basis on pressures is not needed
2534:      since the local Schur complements are already SPD
2535:   */
2536:   if (pcbddc->n_ISForDofsLocal) {
2537:     IS        iP = NULL;
2538:     PetscInt  p,*pp;
2539:     PetscBool flg;

2541:     PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2542:     n    = pcbddc->n_ISForDofsLocal;
2543:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2544:     PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2545:     PetscOptionsEnd();
2546:     if (!flg) {
2547:       n = 1;
2548:       pp[0] = pcbddc->n_ISForDofsLocal-1;
2549:     }

2551:     bsp = 0;
2552:     for (p=0;p<n;p++) {
2553:       PetscInt bs;

2555:       if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2556:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2557:       bsp += bs;
2558:     }
2559:     PetscMalloc1(bsp,&bzerodiag);
2560:     bsp  = 0;
2561:     for (p=0;p<n;p++) {
2562:       const PetscInt *idxs;
2563:       PetscInt       b,bs,npl,*bidxs;

2565:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2566:       ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2567:       ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2568:       PetscMalloc1(npl/bs,&bidxs);
2569:       for (b=0;b<bs;b++) {
2570:         PetscInt i;

2572:         for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2573:         ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2574:         bsp++;
2575:       }
2576:       PetscFree(bidxs);
2577:       ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2578:     }
2579:     ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);

2581:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2582:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2583:     if (iP) {
2584:       IS newpressures;

2586:       ISDifference(pressures,iP,&newpressures);
2587:       ISDestroy(&pressures);
2588:       pressures = newpressures;
2589:     }
2590:     ISSorted(pressures,&sorted);
2591:     if (!sorted) {
2592:       ISSort(pressures);
2593:     }
2594:     PetscFree(pp);
2595:   }

2597:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2598:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2599:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2600:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2601:   ISSorted(zerodiag,&sorted);
2602:   if (!sorted) {
2603:     ISSort(zerodiag);
2604:   }
2605:   PetscObjectReference((PetscObject)zerodiag);
2606:   zerodiag_save = zerodiag;
2607:   ISGetLocalSize(zerodiag,&nz);
2608:   if (!nz) {
2609:     if (n) have_null = PETSC_FALSE;
2610:     has_null_pressures = PETSC_FALSE;
2611:     ISDestroy(&zerodiag);
2612:   }
2613:   recompute_zerodiag = PETSC_FALSE;

2615:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2616:   zerodiag_subs    = NULL;
2617:   benign_n         = 0;
2618:   n_interior_dofs  = 0;
2619:   interior_dofs    = NULL;
2620:   nneu             = 0;
2621:   if (pcbddc->NeumannBoundariesLocal) {
2622:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2623:   }
2624:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2625:   if (checkb) { /* need to compute interior nodes */
2626:     PetscInt n,i,j;
2627:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2628:     PetscInt *iwork;

2630:     ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2631:     ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2632:     PetscCalloc1(n,&iwork);
2633:     PetscMalloc1(n,&interior_dofs);
2634:     for (i=1;i<n_neigh;i++)
2635:       for (j=0;j<n_shared[i];j++)
2636:           iwork[shared[i][j]] += 1;
2637:     for (i=0;i<n;i++)
2638:       if (!iwork[i])
2639:         interior_dofs[n_interior_dofs++] = i;
2640:     PetscFree(iwork);
2641:     ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2642:   }
2643:   if (has_null_pressures) {
2644:     IS             *subs;
2645:     PetscInt       nsubs,i,j,nl;
2646:     const PetscInt *idxs;
2647:     PetscScalar    *array;
2648:     Vec            *work;
2649:     Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);

2651:     subs  = pcbddc->local_subs;
2652:     nsubs = pcbddc->n_local_subs;
2653:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2654:     if (checkb) {
2655:       VecDuplicateVecs(matis->y,2,&work);
2656:       ISGetLocalSize(zerodiag,&nl);
2657:       ISGetIndices(zerodiag,&idxs);
2658:       /* work[0] = 1_p */
2659:       VecSet(work[0],0.);
2660:       VecGetArray(work[0],&array);
2661:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2662:       VecRestoreArray(work[0],&array);
2663:       /* work[0] = 1_v */
2664:       VecSet(work[1],1.);
2665:       VecGetArray(work[1],&array);
2666:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2667:       VecRestoreArray(work[1],&array);
2668:       ISRestoreIndices(zerodiag,&idxs);
2669:     }

2671:     if (nsubs > 1 || bsp > 1) {
2672:       IS       *is;
2673:       PetscInt b,totb;

2675:       totb  = bsp;
2676:       is    = bsp > 1 ? bzerodiag : &zerodiag;
2677:       nsubs = PetscMax(nsubs,1);
2678:       PetscCalloc1(nsubs*totb,&zerodiag_subs);
2679:       for (b=0;b<totb;b++) {
2680:         for (i=0;i<nsubs;i++) {
2681:           ISLocalToGlobalMapping l2g;
2682:           IS                     t_zerodiag_subs;
2683:           PetscInt               nl;

2685:           if (subs) {
2686:             ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2687:           } else {
2688:             IS tis;

2690:             MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2691:             ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2692:             ISLocalToGlobalMappingCreateIS(tis,&l2g);
2693:             ISDestroy(&tis);
2694:           }
2695:           ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2696:           ISGetLocalSize(t_zerodiag_subs,&nl);
2697:           if (nl) {
2698:             PetscBool valid = PETSC_TRUE;

2700:             if (checkb) {
2701:               VecSet(matis->x,0);
2702:               ISGetLocalSize(subs[i],&nl);
2703:               ISGetIndices(subs[i],&idxs);
2704:               VecGetArray(matis->x,&array);
2705:               for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2706:               VecRestoreArray(matis->x,&array);
2707:               ISRestoreIndices(subs[i],&idxs);
2708:               VecPointwiseMult(matis->x,work[0],matis->x);
2709:               MatMult(matis->A,matis->x,matis->y);
2710:               VecPointwiseMult(matis->y,work[1],matis->y);
2711:               VecGetArray(matis->y,&array);
2712:               for (j=0;j<n_interior_dofs;j++) {
2713:                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2714:                   valid = PETSC_FALSE;
2715:                   break;
2716:                 }
2717:               }
2718:               VecRestoreArray(matis->y,&array);
2719:             }
2720:             if (valid && nneu) {
2721:               const PetscInt *idxs;
2722:               PetscInt       nzb;

2724:               ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2725:               ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2726:               ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2727:               if (nzb) valid = PETSC_FALSE;
2728:             }
2729:             if (valid && pressures) {
2730:               IS       t_pressure_subs,tmp;
2731:               PetscInt i1,i2;

2733:               ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2734:               ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2735:               ISGetLocalSize(tmp,&i1);
2736:               ISGetLocalSize(t_zerodiag_subs,&i2);
2737:               if (i2 != i1) valid = PETSC_FALSE;
2738:               ISDestroy(&t_pressure_subs);
2739:               ISDestroy(&tmp);
2740:             }
2741:             if (valid) {
2742:               ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2743:               benign_n++;
2744:             } else recompute_zerodiag = PETSC_TRUE;
2745:           }
2746:           ISDestroy(&t_zerodiag_subs);
2747:           ISLocalToGlobalMappingDestroy(&l2g);
2748:         }
2749:       }
2750:     } else { /* there's just one subdomain (or zero if they have not been detected */
2751:       PetscBool valid = PETSC_TRUE;

2753:       if (nneu) valid = PETSC_FALSE;
2754:       if (valid && pressures) {
2755:         ISEqual(pressures,zerodiag,&valid);
2756:       }
2757:       if (valid && checkb) {
2758:         MatMult(matis->A,work[0],matis->x);
2759:         VecPointwiseMult(matis->x,work[1],matis->x);
2760:         VecGetArray(matis->x,&array);
2761:         for (j=0;j<n_interior_dofs;j++) {
2762:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2763:             valid = PETSC_FALSE;
2764:             break;
2765:           }
2766:         }
2767:         VecRestoreArray(matis->x,&array);
2768:       }
2769:       if (valid) {
2770:         benign_n = 1;
2771:         PetscMalloc1(benign_n,&zerodiag_subs);
2772:         PetscObjectReference((PetscObject)zerodiag);
2773:         zerodiag_subs[0] = zerodiag;
2774:       }
2775:     }
2776:     if (checkb) {
2777:       VecDestroyVecs(2,&work);
2778:     }
2779:   }
2780:   PetscFree(interior_dofs);

2782:   if (!benign_n) {
2783:     PetscInt n;

2785:     ISDestroy(&zerodiag);
2786:     recompute_zerodiag = PETSC_FALSE;
2787:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2788:     if (n) have_null = PETSC_FALSE;
2789:   }

2791:   /* final check for null pressures */
2792:   if (zerodiag && pressures) {
2793:     ISEqual(pressures,zerodiag,&have_null);
2794:   }

2796:   if (recompute_zerodiag) {
2797:     ISDestroy(&zerodiag);
2798:     if (benign_n == 1) {
2799:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2800:       zerodiag = zerodiag_subs[0];
2801:     } else {
2802:       PetscInt i,nzn,*new_idxs;

2804:       nzn = 0;
2805:       for (i=0;i<benign_n;i++) {
2806:         PetscInt ns;
2807:         ISGetLocalSize(zerodiag_subs[i],&ns);
2808:         nzn += ns;
2809:       }
2810:       PetscMalloc1(nzn,&new_idxs);
2811:       nzn = 0;
2812:       for (i=0;i<benign_n;i++) {
2813:         PetscInt ns,*idxs;
2814:         ISGetLocalSize(zerodiag_subs[i],&ns);
2815:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2816:         PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2817:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2818:         nzn += ns;
2819:       }
2820:       PetscSortInt(nzn,new_idxs);
2821:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2822:     }
2823:     have_null = PETSC_FALSE;
2824:   }

2826:   /* determines if the coarse solver will be singular or not */
2827:   MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));

2829:   /* Prepare matrix to compute no-net-flux */
2830:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2831:     Mat                    A,loc_divudotp;
2832:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2833:     IS                     row,col,isused = NULL;
2834:     PetscInt               M,N,n,st,n_isused;

2836:     if (pressures) {
2837:       isused = pressures;
2838:     } else {
2839:       isused = zerodiag_save;
2840:     }
2841:     MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2842:     MatISGetLocalMat(pc->pmat,&A);
2843:     MatGetLocalSize(A,&n,NULL);
2844:     if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2845:     n_isused = 0;
2846:     if (isused) {
2847:       ISGetLocalSize(isused,&n_isused);
2848:     }
2849:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2850:     st = st-n_isused;
2851:     if (n) {
2852:       const PetscInt *gidxs;

2854:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2855:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2856:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2857:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2858:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2859:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2860:     } else {
2861:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2862:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2863:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2864:     }
2865:     MatGetSize(pc->pmat,NULL,&N);
2866:     ISGetSize(row,&M);
2867:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2868:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2869:     ISDestroy(&row);
2870:     ISDestroy(&col);
2871:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2872:     MatSetType(pcbddc->divudotp,MATIS);
2873:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2874:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2875:     ISLocalToGlobalMappingDestroy(&rl2g);
2876:     ISLocalToGlobalMappingDestroy(&cl2g);
2877:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2878:     MatDestroy(&loc_divudotp);
2879:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2880:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2881:   }
2882:   ISDestroy(&zerodiag_save);
2883:   ISDestroy(&pressures);
2884:   if (bzerodiag) {
2885:     PetscInt i;

2887:     for (i=0;i<bsp;i++) {
2888:       ISDestroy(&bzerodiag[i]);
2889:     }
2890:     PetscFree(bzerodiag);
2891:   }
2892:   pcbddc->benign_n = benign_n;
2893:   pcbddc->benign_zerodiag_subs = zerodiag_subs;

2895:   /* determines if the problem has subdomains with 0 pressure block */
2896:   have_null = (PetscBool)(!!pcbddc->benign_n);
2897:   MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));

2899: project_b0:
2900:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2901:   /* change of basis and p0 dofs */
2902:   if (pcbddc->benign_n) {
2903:     PetscInt i,s,*nnz;

2905:     /* local change of basis for pressures */
2906:     MatDestroy(&pcbddc->benign_change);
2907:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2908:     MatSetType(pcbddc->benign_change,MATAIJ);
2909:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2910:     PetscMalloc1(n,&nnz);
2911:     for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2912:     for (i=0;i<pcbddc->benign_n;i++) {
2913:       const PetscInt *idxs;
2914:       PetscInt       nzs,j;

2916:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2917:       ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2918:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2919:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2920:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2921:     }
2922:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2923:     MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2924:     PetscFree(nnz);
2925:     /* set identity by default */
2926:     for (i=0;i<n;i++) {
2927:       MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2928:     }
2929:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2930:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2931:     /* set change on pressures */
2932:     for (s=0;s<pcbddc->benign_n;s++) {
2933:       PetscScalar    *array;
2934:       const PetscInt *idxs;
2935:       PetscInt       nzs;

2937:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2938:       ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2939:       for (i=0;i<nzs-1;i++) {
2940:         PetscScalar vals[2];
2941:         PetscInt    cols[2];

2943:         cols[0] = idxs[i];
2944:         cols[1] = idxs[nzs-1];
2945:         vals[0] = 1.;
2946:         vals[1] = 1.;
2947:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2948:       }
2949:       PetscMalloc1(nzs,&array);
2950:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2951:       array[nzs-1] = 1.;
2952:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2953:       /* store local idxs for p0 */
2954:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2955:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2956:       PetscFree(array);
2957:     }
2958:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2959:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);

2961:     /* project if needed */
2962:     if (pcbddc->benign_change_explicit) {
2963:       Mat M;

2965:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2966:       MatDestroy(&pcbddc->local_mat);
2967:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2968:       MatDestroy(&M);
2969:     }
2970:     /* store global idxs for p0 */
2971:     ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2972:   }
2973:   *zerodiaglocal = zerodiag;
2974:   return(0);
2975: }

2977: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2978: {
2979:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2980:   PetscScalar    *array;

2984:   if (!pcbddc->benign_sf) {
2985:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2986:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2987:   }
2988:   if (get) {
2989:     VecGetArrayRead(v,(const PetscScalar**)&array);
2990:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2991:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2992:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
2993:   } else {
2994:     VecGetArray(v,&array);
2995:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2996:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2997:     VecRestoreArray(v,&array);
2998:   }
2999:   return(0);
3000: }

3002: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3003: {
3004:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

3008:   /* TODO: add error checking
3009:     - avoid nested pop (or push) calls.
3010:     - cannot push before pop.
3011:     - cannot call this if pcbddc->local_mat is NULL
3012:   */
3013:   if (!pcbddc->benign_n) {
3014:     return(0);
3015:   }
3016:   if (pop) {
3017:     if (pcbddc->benign_change_explicit) {
3018:       IS       is_p0;
3019:       MatReuse reuse;

3021:       /* extract B_0 */
3022:       reuse = MAT_INITIAL_MATRIX;
3023:       if (pcbddc->benign_B0) {
3024:         reuse = MAT_REUSE_MATRIX;
3025:       }
3026:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3027:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3028:       /* remove rows and cols from local problem */
3029:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3030:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3031:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3032:       ISDestroy(&is_p0);
3033:     } else {
3034:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
3035:       PetscScalar *vals;
3036:       PetscInt    i,n,*idxs_ins;

3038:       VecGetLocalSize(matis->y,&n);
3039:       PetscMalloc2(n,&idxs_ins,n,&vals);
3040:       if (!pcbddc->benign_B0) {
3041:         PetscInt *nnz;
3042:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3043:         MatSetType(pcbddc->benign_B0,MATAIJ);
3044:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3045:         PetscMalloc1(pcbddc->benign_n,&nnz);
3046:         for (i=0;i<pcbddc->benign_n;i++) {
3047:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3048:           nnz[i] = n - nnz[i];
3049:         }
3050:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3051:         MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3052:         PetscFree(nnz);
3053:       }

3055:       for (i=0;i<pcbddc->benign_n;i++) {
3056:         PetscScalar *array;
3057:         PetscInt    *idxs,j,nz,cum;

3059:         VecSet(matis->x,0.);
3060:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3061:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3062:         for (j=0;j<nz;j++) vals[j] = 1.;
3063:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3064:         VecAssemblyBegin(matis->x);
3065:         VecAssemblyEnd(matis->x);
3066:         VecSet(matis->y,0.);
3067:         MatMult(matis->A,matis->x,matis->y);
3068:         VecGetArray(matis->y,&array);
3069:         cum = 0;
3070:         for (j=0;j<n;j++) {
3071:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3072:             vals[cum] = array[j];
3073:             idxs_ins[cum] = j;
3074:             cum++;
3075:           }
3076:         }
3077:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3078:         VecRestoreArray(matis->y,&array);
3079:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3080:       }
3081:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3082:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3083:       PetscFree2(idxs_ins,vals);
3084:     }
3085:   } else { /* push */
3086:     if (pcbddc->benign_change_explicit) {
3087:       PetscInt i;

3089:       for (i=0;i<pcbddc->benign_n;i++) {
3090:         PetscScalar *B0_vals;
3091:         PetscInt    *B0_cols,B0_ncol;

3093:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3094:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3095:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3096:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3097:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3098:       }
3099:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3100:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3101:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3102:   }
3103:   return(0);
3104: }

3106: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3107: {
3108:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3109:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3110:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
3111:   PetscBLASInt    *B_iwork,*B_ifail;
3112:   PetscScalar     *work,lwork;
3113:   PetscScalar     *St,*S,*eigv;
3114:   PetscScalar     *Sarray,*Starray;
3115:   PetscReal       *eigs,thresh,lthresh,uthresh;
3116:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3117:   PetscBool       allocated_S_St;
3118: #if defined(PETSC_USE_COMPLEX)
3119:   PetscReal       *rwork;
3120: #endif
3121:   PetscErrorCode  ierr;

3124:   if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3125:   if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3126:   if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3127:   PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);

3129:   if (pcbddc->dbg_flag) {
3130:     PetscViewerFlush(pcbddc->dbg_viewer);
3131:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3132:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3133:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3134:   }

3136:   if (pcbddc->dbg_flag) {
3137:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3138:   }

3140:   /* max size of subsets */
3141:   mss = 0;
3142:   for (i=0;i<sub_schurs->n_subs;i++) {
3143:     PetscInt subset_size;

3145:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3146:     mss = PetscMax(mss,subset_size);
3147:   }

3149:   /* min/max and threshold */
3150:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3151:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3152:   nmax = PetscMax(nmin,nmax);
3153:   allocated_S_St = PETSC_FALSE;
3154:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3155:     allocated_S_St = PETSC_TRUE;
3156:   }

3158:   /* allocate lapack workspace */
3159:   cum = cum2 = 0;
3160:   maxneigs = 0;
3161:   for (i=0;i<sub_schurs->n_subs;i++) {
3162:     PetscInt n,subset_size;

3164:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3165:     n = PetscMin(subset_size,nmax);
3166:     cum += subset_size;
3167:     cum2 += subset_size*n;
3168:     maxneigs = PetscMax(maxneigs,n);
3169:   }
3170:   if (mss) {
3171:     if (sub_schurs->is_symmetric) {
3172:       PetscBLASInt B_itype = 1;
3173:       PetscBLASInt B_N = mss;
3174:       PetscReal    zero = 0.0;
3175:       PetscReal    eps = 0.0; /* dlamch? */

3177:       B_lwork = -1;
3178:       S = NULL;
3179:       St = NULL;
3180:       eigs = NULL;
3181:       eigv = NULL;
3182:       B_iwork = NULL;
3183:       B_ifail = NULL;
3184: #if defined(PETSC_USE_COMPLEX)
3185:       rwork = NULL;
3186: #endif
3187:       thresh = 1.0;
3188:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3189: #if defined(PETSC_USE_COMPLEX)
3190:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3191: #else
3192:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3193: #endif
3194:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3195:       PetscFPTrapPop();
3196:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3197:   } else {
3198:     lwork = 0;
3199:   }

3201:   nv = 0;
3202:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3203:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3204:   }
3205:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3206:   if (allocated_S_St) {
3207:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3208:   }
3209:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3210: #if defined(PETSC_USE_COMPLEX)
3211:   PetscMalloc1(7*mss,&rwork);
3212: #endif
3213:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3214:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3215:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3216:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3217:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3218:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

3220:   maxneigs = 0;
3221:   cum = cumarray = 0;
3222:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3223:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3224:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3225:     const PetscInt *idxs;

3227:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3228:     for (cum=0;cum<nv;cum++) {
3229:       pcbddc->adaptive_constraints_n[cum] = 1;
3230:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3231:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3232:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3233:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3234:     }
3235:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3236:   }

3238:   if (mss) { /* multilevel */
3239:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3240:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3241:   }

3243:   lthresh = pcbddc->adaptive_threshold[0];
3244:   uthresh = pcbddc->adaptive_threshold[1];
3245:   for (i=0;i<sub_schurs->n_subs;i++) {
3246:     const PetscInt *idxs;
3247:     PetscReal      upper,lower;
3248:     PetscInt       j,subset_size,eigs_start = 0;
3249:     PetscBLASInt   B_N;
3250:     PetscBool      same_data = PETSC_FALSE;
3251:     PetscBool      scal = PETSC_FALSE;

3253:     if (pcbddc->use_deluxe_scaling) {
3254:       upper = PETSC_MAX_REAL;
3255:       lower = uthresh;
3256:     } else {
3257:       if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3258:       upper = 1./uthresh;
3259:       lower = 0.;
3260:     }
3261:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3262:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3263:     PetscBLASIntCast(subset_size,&B_N);
3264:     /* this is experimental: we assume the dofs have been properly grouped to have
3265:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3266:     if (!sub_schurs->is_posdef) {
3267:       Mat T;

3269:       for (j=0;j<subset_size;j++) {
3270:         if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3271:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3272:           MatScale(T,-1.0);
3273:           MatDestroy(&T);
3274:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3275:           MatScale(T,-1.0);
3276:           MatDestroy(&T);
3277:           if (sub_schurs->change_primal_sub) {
3278:             PetscInt       nz,k;
3279:             const PetscInt *idxs;

3281:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3282:             ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3283:             for (k=0;k<nz;k++) {
3284:               *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3285:               *(Starray + cumarray + idxs[k]*(subset_size+1))  = 0.0;
3286:             }
3287:             ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3288:           }
3289:           scal = PETSC_TRUE;
3290:           break;
3291:         }
3292:       }
3293:     }

3295:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3296:       if (sub_schurs->is_symmetric) {
3297:         PetscInt j,k;
3298:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3299:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3300:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3301:         }
3302:         for (j=0;j<subset_size;j++) {
3303:           for (k=j;k<subset_size;k++) {
3304:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3305:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3306:           }
3307:         }
3308:       } else {
3309:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3310:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3311:       }
3312:     } else {
3313:       S = Sarray + cumarray;
3314:       St = Starray + cumarray;
3315:     }
3316:     /* see if we can save some work */
3317:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3318:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3319:     }

3321:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3322:       B_neigs = 0;
3323:     } else {
3324:       if (sub_schurs->is_symmetric) {
3325:         PetscBLASInt B_itype = 1;
3326:         PetscBLASInt B_IL, B_IU;
3327:         PetscReal    eps = -1.0; /* dlamch? */
3328:         PetscInt     nmin_s;
3329:         PetscBool    compute_range;

3331:         B_neigs = 0;
3332:         compute_range = (PetscBool)!same_data;
3333:         if (nmin >= subset_size) compute_range = PETSC_FALSE;

3335:         if (pcbddc->dbg_flag) {
3336:           PetscInt nc = 0;

3338:           if (sub_schurs->change_primal_sub) {
3339:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3340:           }
3341:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3342:         }

3344:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3345:         if (compute_range) {

3347:           /* ask for eigenvalues larger than thresh */
3348:           if (sub_schurs->is_posdef) {
3349: #if defined(PETSC_USE_COMPLEX)
3350:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3351: #else
3352:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3353: #endif
3354:             PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3355:           } else { /* no theory so far, but it works nicely */
3356:             PetscInt  recipe = 0,recipe_m = 1;
3357:             PetscReal bb[2];

3359:             PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3360:             switch (recipe) {
3361:             case 0:
3362:               if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3363:               else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3364: #if defined(PETSC_USE_COMPLEX)
3365:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3366: #else
3367:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3368: #endif
3369:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3370:               break;
3371:             case 1:
3372:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3373: #if defined(PETSC_USE_COMPLEX)
3374:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3375: #else
3376:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3377: #endif
3378:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3379:               if (!scal) {
3380:                 PetscBLASInt B_neigs2 = 0;

3382:                 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3383:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3384:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3385: #if defined(PETSC_USE_COMPLEX)
3386:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3387: #else
3388:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3389: #endif
3390:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3391:                 B_neigs += B_neigs2;
3392:               }
3393:               break;
3394:             case 2:
3395:               if (scal) {
3396:                 bb[0] = PETSC_MIN_REAL;
3397:                 bb[1] = 0;
3398: #if defined(PETSC_USE_COMPLEX)
3399:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3400: #else
3401:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3402: #endif
3403:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3404:               } else {
3405:                 PetscBLASInt B_neigs2 = 0;
3406:                 PetscBool    import = PETSC_FALSE;

3408:                 lthresh = PetscMax(lthresh,0.0);
3409:                 if (lthresh > 0.0) {
3410:                   bb[0] = PETSC_MIN_REAL;
3411:                   bb[1] = lthresh*lthresh;

3413:                   import = PETSC_TRUE;
3414: #if defined(PETSC_USE_COMPLEX)
3415:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3416: #else
3417:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3418: #endif
3419:                   PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3420:                 }
3421:                 bb[0] = PetscMax(lthresh*lthresh,uthresh);
3422:                 bb[1] = PETSC_MAX_REAL;
3423:                 if (import) {
3424:                   PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3425:                   PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3426:                 }
3427: #if defined(PETSC_USE_COMPLEX)
3428:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3429: #else
3430:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3431: #endif
3432:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3433:                 B_neigs += B_neigs2;
3434:               }
3435:               break;
3436:             case 3:
3437:               if (scal) {
3438:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3439:               } else {
3440:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3441:               }
3442:               if (!scal) {
3443:                 bb[0] = uthresh;
3444:                 bb[1] = PETSC_MAX_REAL;
3445: #if defined(PETSC_USE_COMPLEX)
3446:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3447: #else
3448:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3449: #endif
3450:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3451:               }
3452:               if (recipe_m > 0 && B_N - B_neigs > 0) {
3453:                 PetscBLASInt B_neigs2 = 0;

3455:                 B_IL = 1;
3456:                 PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3457:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3458:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3459: #if defined(PETSC_USE_COMPLEX)
3460:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3461: #else
3462:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3463: #endif
3464:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3465:                 B_neigs += B_neigs2;
3466:               }
3467:               break;
3468:             case 4:
3469:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3470: #if defined(PETSC_USE_COMPLEX)
3471:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3472: #else
3473:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3474: #endif
3475:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3476:               {
3477:                 PetscBLASInt B_neigs2 = 0;

3479:                 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3480:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3481:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3482: #if defined(PETSC_USE_COMPLEX)
3483:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3484: #else
3485:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3486: #endif
3487:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3488:                 B_neigs += B_neigs2;
3489:               }
3490:               break;
3491:             case 5: /* same as before: first compute all eigenvalues, then filter */
3492: #if defined(PETSC_USE_COMPLEX)
3493:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3494: #else
3495:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3496: #endif
3497:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3498:               {
3499:                 PetscInt e,k,ne;
3500:                 for (e=0,ne=0;e<B_neigs;e++) {
3501:                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3502:                     for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3503:                     eigs[ne] = eigs[e];
3504:                     ne++;
3505:                   }
3506:                 }
3507:                 PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));
3508:                 B_neigs = ne;
3509:               }
3510:               break;
3511:             default:
3512:               SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3513:               break;
3514:             }
3515:           }
3516:         } else if (!same_data) { /* this is just to see all the eigenvalues */
3517:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3518:           B_IL = 1;
3519: #if defined(PETSC_USE_COMPLEX)
3520:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3521: #else
3522:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3523: #endif
3524:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3525:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3526:           PetscInt k;
3527:           if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3528:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3529:           PetscBLASIntCast(nmax,&B_neigs);
3530:           nmin = nmax;
3531:           PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3532:           for (k=0;k<nmax;k++) {
3533:             eigs[k] = 1./PETSC_SMALL;
3534:             eigv[k*(subset_size+1)] = 1.0;
3535:           }
3536:         }
3537:         PetscFPTrapPop();
3538:         if (B_ierr) {
3539:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3540:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3541:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3542:         }

3544:         if (B_neigs > nmax) {
3545:           if (pcbddc->dbg_flag) {
3546:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3547:           }
3548:           if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3549:           B_neigs = nmax;
3550:         }

3552:         nmin_s = PetscMin(nmin,B_N);
3553:         if (B_neigs < nmin_s) {
3554:           PetscBLASInt B_neigs2 = 0;

3556:           if (pcbddc->use_deluxe_scaling) {
3557:             if (scal) {
3558:               B_IU = nmin_s;
3559:               B_IL = B_neigs + 1;
3560:             } else {
3561:               B_IL = B_N - nmin_s + 1;
3562:               B_IU = B_N - B_neigs;
3563:             }
3564:           } else {
3565:             B_IL = B_neigs + 1;
3566:             B_IU = nmin_s;
3567:           }
3568:           if (pcbddc->dbg_flag) {
3569:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3570:           }
3571:           if (sub_schurs->is_symmetric) {
3572:             PetscInt j,k;
3573:             for (j=0;j<subset_size;j++) {
3574:               for (k=j;k<subset_size;k++) {
3575:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3576:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3577:               }
3578:             }
3579:           } else {
3580:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3581:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3582:           }
3583:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3584: #if defined(PETSC_USE_COMPLEX)
3585:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3586: #else
3587:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3588: #endif
3589:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3590:           PetscFPTrapPop();
3591:           B_neigs += B_neigs2;
3592:         }
3593:         if (B_ierr) {
3594:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3595:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3596:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3597:         }
3598:         if (pcbddc->dbg_flag) {
3599:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3600:           for (j=0;j<B_neigs;j++) {
3601:             if (eigs[j] == 0.0) {
3602:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3603:             } else {
3604:               if (pcbddc->use_deluxe_scaling) {
3605:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3606:               } else {
3607:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3608:               }
3609:             }
3610:           }
3611:         }
3612:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3613:     }
3614:     /* change the basis back to the original one */
3615:     if (sub_schurs->change) {
3616:       Mat change,phi,phit;

3618:       if (pcbddc->dbg_flag > 2) {
3619:         PetscInt ii;
3620:         for (ii=0;ii<B_neigs;ii++) {
3621:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3622:           for (j=0;j<B_N;j++) {
3623: #if defined(PETSC_USE_COMPLEX)
3624:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3625:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3626:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3627: #else
3628:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3629: #endif
3630:           }
3631:         }
3632:       }
3633:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3634:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3635:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3636:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3637:       MatDestroy(&phit);
3638:       MatDestroy(&phi);
3639:     }
3640:     maxneigs = PetscMax(B_neigs,maxneigs);
3641:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3642:     if (B_neigs) {
3643:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

3645:       if (pcbddc->dbg_flag > 1) {
3646:         PetscInt ii;
3647:         for (ii=0;ii<B_neigs;ii++) {
3648:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3649:           for (j=0;j<B_N;j++) {
3650: #if defined(PETSC_USE_COMPLEX)
3651:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3652:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3653:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3654: #else
3655:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3656: #endif
3657:           }
3658:         }
3659:       }
3660:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3661:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3662:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3663:       cum++;
3664:     }
3665:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3666:     /* shift for next computation */
3667:     cumarray += subset_size*subset_size;
3668:   }
3669:   if (pcbddc->dbg_flag) {
3670:     PetscViewerFlush(pcbddc->dbg_viewer);
3671:   }

3673:   if (mss) {
3674:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3675:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3676:     /* destroy matrices (junk) */
3677:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3678:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3679:   }
3680:   if (allocated_S_St) {
3681:     PetscFree2(S,St);
3682:   }
3683:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3684: #if defined(PETSC_USE_COMPLEX)
3685:   PetscFree(rwork);
3686: #endif
3687:   if (pcbddc->dbg_flag) {
3688:     PetscInt maxneigs_r;
3689:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3690:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3691:   }
3692:   PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3693:   return(0);
3694: }

3696: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3697: {
3698:   PetscScalar    *coarse_submat_vals;

3702:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3703:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3704:   PCBDDCSetUpLocalScatters(pc);

3706:   /* Setup local neumann solver ksp_R */
3707:   /* PCBDDCSetUpLocalScatters should be called first! */
3708:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3710:   /*
3711:      Setup local correction and local part of coarse basis.
3712:      Gives back the dense local part of the coarse matrix in column major ordering
3713:   */
3714:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3716:   /* Compute total number of coarse nodes and setup coarse solver */
3717:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3719:   /* free */
3720:   PetscFree(coarse_submat_vals);
3721:   return(0);
3722: }

3724: PetscErrorCode PCBDDCResetCustomization(PC pc)
3725: {
3726:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3730:   ISDestroy(&pcbddc->user_primal_vertices);
3731:   ISDestroy(&pcbddc->user_primal_vertices_local);
3732:   ISDestroy(&pcbddc->NeumannBoundaries);
3733:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3734:   ISDestroy(&pcbddc->DirichletBoundaries);
3735:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3736:   PetscFree(pcbddc->onearnullvecs_state);
3737:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3738:   PCBDDCSetDofsSplitting(pc,0,NULL);
3739:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3740:   return(0);
3741: }

3743: PetscErrorCode PCBDDCResetTopography(PC pc)
3744: {
3745:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3746:   PetscInt       i;

3750:   MatDestroy(&pcbddc->nedcG);
3751:   ISDestroy(&pcbddc->nedclocal);
3752:   MatDestroy(&pcbddc->discretegradient);
3753:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3754:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3755:   MatDestroy(&pcbddc->switch_static_change);
3756:   VecDestroy(&pcbddc->work_change);
3757:   MatDestroy(&pcbddc->ConstraintMatrix);
3758:   MatDestroy(&pcbddc->divudotp);
3759:   ISDestroy(&pcbddc->divudotp_vl2l);
3760:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3761:   for (i=0;i<pcbddc->n_local_subs;i++) {
3762:     ISDestroy(&pcbddc->local_subs[i]);
3763:   }
3764:   pcbddc->n_local_subs = 0;
3765:   PetscFree(pcbddc->local_subs);
3766:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3767:   pcbddc->graphanalyzed        = PETSC_FALSE;
3768:   pcbddc->recompute_topography = PETSC_TRUE;
3769:   pcbddc->corner_selected      = PETSC_FALSE;
3770:   return(0);
3771: }

3773: PetscErrorCode PCBDDCResetSolvers(PC pc)
3774: {
3775:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3779:   VecDestroy(&pcbddc->coarse_vec);
3780:   if (pcbddc->coarse_phi_B) {
3781:     PetscScalar *array;
3782:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3783:     PetscFree(array);
3784:   }
3785:   MatDestroy(&pcbddc->coarse_phi_B);
3786:   MatDestroy(&pcbddc->coarse_phi_D);
3787:   MatDestroy(&pcbddc->coarse_psi_B);
3788:   MatDestroy(&pcbddc->coarse_psi_D);
3789:   VecDestroy(&pcbddc->vec1_P);
3790:   VecDestroy(&pcbddc->vec1_C);
3791:   MatDestroy(&pcbddc->local_auxmat2);
3792:   MatDestroy(&pcbddc->local_auxmat1);
3793:   VecDestroy(&pcbddc->vec1_R);
3794:   VecDestroy(&pcbddc->vec2_R);
3795:   ISDestroy(&pcbddc->is_R_local);
3796:   VecScatterDestroy(&pcbddc->R_to_B);
3797:   VecScatterDestroy(&pcbddc->R_to_D);
3798:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3799:   KSPReset(pcbddc->ksp_D);
3800:   KSPReset(pcbddc->ksp_R);
3801:   KSPReset(pcbddc->coarse_ksp);
3802:   MatDestroy(&pcbddc->local_mat);
3803:   PetscFree(pcbddc->primal_indices_local_idxs);
3804:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3805:   PetscFree(pcbddc->global_primal_indices);
3806:   ISDestroy(&pcbddc->coarse_subassembling);
3807:   MatDestroy(&pcbddc->benign_change);
3808:   VecDestroy(&pcbddc->benign_vec);
3809:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3810:   MatDestroy(&pcbddc->benign_B0);
3811:   PetscSFDestroy(&pcbddc->benign_sf);
3812:   if (pcbddc->benign_zerodiag_subs) {
3813:     PetscInt i;
3814:     for (i=0;i<pcbddc->benign_n;i++) {
3815:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3816:     }
3817:     PetscFree(pcbddc->benign_zerodiag_subs);
3818:   }
3819:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3820:   return(0);
3821: }

3823: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3824: {
3825:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3826:   PC_IS          *pcis = (PC_IS*)pc->data;
3827:   VecType        impVecType;
3828:   PetscInt       n_constraints,n_R,old_size;

3832:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3833:   n_R = pcis->n - pcbddc->n_vertices;
3834:   VecGetType(pcis->vec1_N,&impVecType);
3835:   /* local work vectors (try to avoid unneeded work)*/
3836:   /* R nodes */
3837:   old_size = -1;
3838:   if (pcbddc->vec1_R) {
3839:     VecGetSize(pcbddc->vec1_R,&old_size);
3840:   }
3841:   if (n_R != old_size) {
3842:     VecDestroy(&pcbddc->vec1_R);
3843:     VecDestroy(&pcbddc->vec2_R);
3844:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3845:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3846:     VecSetType(pcbddc->vec1_R,impVecType);
3847:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3848:   }
3849:   /* local primal dofs */
3850:   old_size = -1;
3851:   if (pcbddc->vec1_P) {
3852:     VecGetSize(pcbddc->vec1_P,&old_size);
3853:   }
3854:   if (pcbddc->local_primal_size != old_size) {
3855:     VecDestroy(&pcbddc->vec1_P);
3856:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3857:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3858:     VecSetType(pcbddc->vec1_P,impVecType);
3859:   }
3860:   /* local explicit constraints */
3861:   old_size = -1;
3862:   if (pcbddc->vec1_C) {
3863:     VecGetSize(pcbddc->vec1_C,&old_size);
3864:   }
3865:   if (n_constraints && n_constraints != old_size) {
3866:     VecDestroy(&pcbddc->vec1_C);
3867:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3868:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3869:     VecSetType(pcbddc->vec1_C,impVecType);
3870:   }
3871:   return(0);
3872: }

3874: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3875: {
3876:   PetscErrorCode  ierr;
3877:   /* pointers to pcis and pcbddc */
3878:   PC_IS*          pcis = (PC_IS*)pc->data;
3879:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3880:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3881:   /* submatrices of local problem */
3882:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3883:   /* submatrices of local coarse problem */
3884:   Mat             S_VV,S_CV,S_VC,S_CC;
3885:   /* working matrices */
3886:   Mat             C_CR;
3887:   /* additional working stuff */
3888:   PC              pc_R;
3889:   Mat             F,Brhs = NULL;
3890:   Vec             dummy_vec;
3891:   PetscBool       isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3892:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3893:   PetscScalar     *work;
3894:   PetscInt        *idx_V_B;
3895:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3896:   PetscInt        i,n_R,n_D,n_B;

3898:   /* some shortcuts to scalars */
3899:   PetscScalar     one=1.0,m_one=-1.0;

3902:   if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3903:   PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);

3905:   /* Set Non-overlapping dimensions */
3906:   n_vertices = pcbddc->n_vertices;
3907:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3908:   n_B = pcis->n_B;
3909:   n_D = pcis->n - n_B;
3910:   n_R = pcis->n - n_vertices;

3912:   /* vertices in boundary numbering */
3913:   PetscMalloc1(n_vertices,&idx_V_B);
3914:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3915:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);

3917:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3918:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3919:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3920:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3921:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3922:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3923:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3924:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3925:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3926:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

3928:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3929:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3930:   PCSetUp(pc_R);
3931:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3932:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3933:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3934:   lda_rhs = n_R;
3935:   need_benign_correction = PETSC_FALSE;
3936:   if (isLU || isILU || isCHOL) {
3937:     PCFactorGetMatrix(pc_R,&F);
3938:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3939:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3940:     MatFactorType      type;

3942:     F = reuse_solver->F;
3943:     MatGetFactorType(F,&type);
3944:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3945:     MatGetSize(F,&lda_rhs,NULL);
3946:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3947:   } else {
3948:     F = NULL;
3949:   }

3951:   /* determine if we can use a sparse right-hand side */
3952:   sparserhs = PETSC_FALSE;
3953:   if (F) {
3954:     MatSolverType solver;

3956:     MatFactorGetSolverType(F,&solver);
3957:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3958:   }

3960:   /* allocate workspace */
3961:   n = 0;
3962:   if (n_constraints) {
3963:     n += lda_rhs*n_constraints;
3964:   }
3965:   if (n_vertices) {
3966:     n = PetscMax(2*lda_rhs*n_vertices,n);
3967:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3968:   }
3969:   if (!pcbddc->symmetric_primal) {
3970:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3971:   }
3972:   PetscMalloc1(n,&work);

3974:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3975:   dummy_vec = NULL;
3976:   if (need_benign_correction && lda_rhs != n_R && F) {
3977:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3978:     VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3979:     VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3980:   }

3982:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3983:   if (n_constraints) {
3984:     Mat         M3,C_B;
3985:     IS          is_aux;
3986:     PetscScalar *array,*array2;

3988:     MatDestroy(&pcbddc->local_auxmat1);
3989:     MatDestroy(&pcbddc->local_auxmat2);

3991:     /* Extract constraints on R nodes: C_{CR}  */
3992:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3993:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3994:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

3996:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3997:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3998:     if (!sparserhs) {
3999:       PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
4000:       for (i=0;i<n_constraints;i++) {
4001:         const PetscScalar *row_cmat_values;
4002:         const PetscInt    *row_cmat_indices;
4003:         PetscInt          size_of_constraint,j;

4005:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4006:         for (j=0;j<size_of_constraint;j++) {
4007:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
4008:         }
4009:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4010:       }
4011:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
4012:     } else {
4013:       Mat tC_CR;

4015:       MatScale(C_CR,-1.0);
4016:       if (lda_rhs != n_R) {
4017:         PetscScalar *aa;
4018:         PetscInt    r,*ii,*jj;
4019:         PetscBool   done;

4021:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4022:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4023:         MatSeqAIJGetArray(C_CR,&aa);
4024:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4025:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4026:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4027:       } else {
4028:         PetscObjectReference((PetscObject)C_CR);
4029:         tC_CR = C_CR;
4030:       }
4031:       MatCreateTranspose(tC_CR,&Brhs);
4032:       MatDestroy(&tC_CR);
4033:     }
4034:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4035:     if (F) {
4036:       if (need_benign_correction) {
4037:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4039:         /* rhs is already zero on interior dofs, no need to change the rhs */
4040:         PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
4041:       }
4042:       MatMatSolve(F,Brhs,local_auxmat2_R);
4043:       if (need_benign_correction) {
4044:         PetscScalar        *marr;
4045:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4047:         MatDenseGetArray(local_auxmat2_R,&marr);
4048:         if (lda_rhs != n_R) {
4049:           for (i=0;i<n_constraints;i++) {
4050:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4051:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4052:             VecResetArray(dummy_vec);
4053:           }
4054:         } else {
4055:           for (i=0;i<n_constraints;i++) {
4056:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4057:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4058:             VecResetArray(pcbddc->vec1_R);
4059:           }
4060:         }
4061:         MatDenseRestoreArray(local_auxmat2_R,&marr);
4062:       }
4063:     } else {
4064:       PetscScalar *marr;

4066:       MatDenseGetArray(local_auxmat2_R,&marr);
4067:       for (i=0;i<n_constraints;i++) {
4068:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4069:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4070:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4071:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4072:         VecResetArray(pcbddc->vec1_R);
4073:         VecResetArray(pcbddc->vec2_R);
4074:       }
4075:       MatDenseRestoreArray(local_auxmat2_R,&marr);
4076:     }
4077:     if (sparserhs) {
4078:       MatScale(C_CR,-1.0);
4079:     }
4080:     MatDestroy(&Brhs);
4081:     if (!pcbddc->switch_static) {
4082:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4083:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
4084:       MatDenseGetArray(local_auxmat2_R,&array2);
4085:       for (i=0;i<n_constraints;i++) {
4086:         VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4087:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
4088:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4089:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4090:         VecResetArray(pcis->vec1_B);
4091:         VecResetArray(pcbddc->vec1_R);
4092:       }
4093:       MatDenseRestoreArray(local_auxmat2_R,&array2);
4094:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4095:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4096:     } else {
4097:       if (lda_rhs != n_R) {
4098:         IS dummy;

4100:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4101:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4102:         ISDestroy(&dummy);
4103:       } else {
4104:         PetscObjectReference((PetscObject)local_auxmat2_R);
4105:         pcbddc->local_auxmat2 = local_auxmat2_R;
4106:       }
4107:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4108:     }
4109:     ISDestroy(&is_aux);
4110:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
4111:     MatScale(M3,m_one);
4112:     if (isCHOL) {
4113:       MatCholeskyFactor(M3,NULL,NULL);
4114:     } else {
4115:       MatLUFactor(M3,NULL,NULL,NULL);
4116:     }
4117:     MatSeqDenseInvertFactors_Private(M3);
4118:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4119:     MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4120:     MatDestroy(&C_B);
4121:     MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4122:     MatDestroy(&M3);
4123:   }

4125:   /* Get submatrices from subdomain matrix */
4126:   if (n_vertices) {
4127:     IS        is_aux;
4128:     PetscBool isseqaij;

4130:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4131:       IS tis;

4133:       ISDuplicate(pcbddc->is_R_local,&tis);
4134:       ISSort(tis);
4135:       ISComplement(tis,0,pcis->n,&is_aux);
4136:       ISDestroy(&tis);
4137:     } else {
4138:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4139:     }
4140:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4141:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4142:     PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
4143:     if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
4144:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4145:     }
4146:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4147:     ISDestroy(&is_aux);
4148:   }

4150:   /* Matrix of coarse basis functions (local) */
4151:   if (pcbddc->coarse_phi_B) {
4152:     PetscInt on_B,on_primal,on_D=n_D;
4153:     if (pcbddc->coarse_phi_D) {
4154:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4155:     }
4156:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4157:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4158:       PetscScalar *marray;

4160:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4161:       PetscFree(marray);
4162:       MatDestroy(&pcbddc->coarse_phi_B);
4163:       MatDestroy(&pcbddc->coarse_psi_B);
4164:       MatDestroy(&pcbddc->coarse_phi_D);
4165:       MatDestroy(&pcbddc->coarse_psi_D);
4166:     }
4167:   }

4169:   if (!pcbddc->coarse_phi_B) {
4170:     PetscScalar *marr;

4172:     /* memory size */
4173:     n = n_B*pcbddc->local_primal_size;
4174:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4175:     if (!pcbddc->symmetric_primal) n *= 2;
4176:     PetscCalloc1(n,&marr);
4177:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4178:     marr += n_B*pcbddc->local_primal_size;
4179:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4180:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4181:       marr += n_D*pcbddc->local_primal_size;
4182:     }
4183:     if (!pcbddc->symmetric_primal) {
4184:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4185:       marr += n_B*pcbddc->local_primal_size;
4186:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4187:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4188:       }
4189:     } else {
4190:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4191:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4192:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4193:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4194:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4195:       }
4196:     }
4197:   }

4199:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4200:   p0_lidx_I = NULL;
4201:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4202:     const PetscInt *idxs;

4204:     ISGetIndices(pcis->is_I_local,&idxs);
4205:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4206:     for (i=0;i<pcbddc->benign_n;i++) {
4207:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4208:     }
4209:     ISRestoreIndices(pcis->is_I_local,&idxs);
4210:   }

4212:   /* vertices */
4213:   if (n_vertices) {
4214:     PetscBool restoreavr = PETSC_FALSE;

4216:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

4218:     if (n_R) {
4219:       Mat          A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4220:       PetscBLASInt B_N,B_one = 1;
4221:       PetscScalar  *x,*y;

4223:       MatScale(A_RV,m_one);
4224:       if (need_benign_correction) {
4225:         ISLocalToGlobalMapping RtoN;
4226:         IS                     is_p0;
4227:         PetscInt               *idxs_p0,n;

4229:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4230:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4231:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4232:         if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4233:         ISLocalToGlobalMappingDestroy(&RtoN);
4234:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4235:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4236:         ISDestroy(&is_p0);
4237:       }

4239:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4240:       if (!sparserhs || need_benign_correction) {
4241:         if (lda_rhs == n_R) {
4242:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4243:         } else {
4244:           PetscScalar    *av,*array;
4245:           const PetscInt *xadj,*adjncy;
4246:           PetscInt       n;
4247:           PetscBool      flg_row;

4249:           array = work+lda_rhs*n_vertices;
4250:           PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
4251:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4252:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4253:           MatSeqAIJGetArray(A_RV,&av);
4254:           for (i=0;i<n;i++) {
4255:             PetscInt j;
4256:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4257:           }
4258:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4259:           MatDestroy(&A_RV);
4260:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4261:         }
4262:         if (need_benign_correction) {
4263:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4264:           PetscScalar        *marr;

4266:           MatDenseGetArray(A_RV,&marr);
4267:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4269:                  | 0 0  0 | (V)
4270:              L = | 0 0 -1 | (P-p0)
4271:                  | 0 0 -1 | (p0)

4273:           */
4274:           for (i=0;i<reuse_solver->benign_n;i++) {
4275:             const PetscScalar *vals;
4276:             const PetscInt    *idxs,*idxs_zero;
4277:             PetscInt          n,j,nz;

4279:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4280:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4281:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4282:             for (j=0;j<n;j++) {
4283:               PetscScalar val = vals[j];
4284:               PetscInt    k,col = idxs[j];
4285:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4286:             }
4287:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4288:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4289:           }
4290:           MatDenseRestoreArray(A_RV,&marr);
4291:         }
4292:         PetscObjectReference((PetscObject)A_RV);
4293:         Brhs = A_RV;
4294:       } else {
4295:         Mat tA_RVT,A_RVT;

4297:         if (!pcbddc->symmetric_primal) {
4298:           /* A_RV already scaled by -1 */
4299:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4300:         } else {
4301:           restoreavr = PETSC_TRUE;
4302:           MatScale(A_VR,-1.0);
4303:           PetscObjectReference((PetscObject)A_VR);
4304:           A_RVT = A_VR;
4305:         }
4306:         if (lda_rhs != n_R) {
4307:           PetscScalar *aa;
4308:           PetscInt    r,*ii,*jj;
4309:           PetscBool   done;

4311:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4312:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4313:           MatSeqAIJGetArray(A_RVT,&aa);
4314:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4315:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4316:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4317:         } else {
4318:           PetscObjectReference((PetscObject)A_RVT);
4319:           tA_RVT = A_RVT;
4320:         }
4321:         MatCreateTranspose(tA_RVT,&Brhs);
4322:         MatDestroy(&tA_RVT);
4323:         MatDestroy(&A_RVT);
4324:       }
4325:       if (F) {
4326:         /* need to correct the rhs */
4327:         if (need_benign_correction) {
4328:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4329:           PetscScalar        *marr;

4331:           MatDenseGetArray(Brhs,&marr);
4332:           if (lda_rhs != n_R) {
4333:             for (i=0;i<n_vertices;i++) {
4334:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4335:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4336:               VecResetArray(dummy_vec);
4337:             }
4338:           } else {
4339:             for (i=0;i<n_vertices;i++) {
4340:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4341:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4342:               VecResetArray(pcbddc->vec1_R);
4343:             }
4344:           }
4345:           MatDenseRestoreArray(Brhs,&marr);
4346:         }
4347:         MatMatSolve(F,Brhs,A_RRmA_RV);
4348:         if (restoreavr) {
4349:           MatScale(A_VR,-1.0);
4350:         }
4351:         /* need to correct the solution */
4352:         if (need_benign_correction) {
4353:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4354:           PetscScalar        *marr;

4356:           MatDenseGetArray(A_RRmA_RV,&marr);
4357:           if (lda_rhs != n_R) {
4358:             for (i=0;i<n_vertices;i++) {
4359:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4360:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4361:               VecResetArray(dummy_vec);
4362:             }
4363:           } else {
4364:             for (i=0;i<n_vertices;i++) {
4365:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4366:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4367:               VecResetArray(pcbddc->vec1_R);
4368:             }
4369:           }
4370:           MatDenseRestoreArray(A_RRmA_RV,&marr);
4371:         }
4372:       } else {
4373:         MatDenseGetArray(Brhs,&y);
4374:         for (i=0;i<n_vertices;i++) {
4375:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4376:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4377:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4378:           KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4379:           VecResetArray(pcbddc->vec1_R);
4380:           VecResetArray(pcbddc->vec2_R);
4381:         }
4382:         MatDenseRestoreArray(Brhs,&y);
4383:       }
4384:       MatDestroy(&A_RV);
4385:       MatDestroy(&Brhs);
4386:       /* S_VV and S_CV */
4387:       if (n_constraints) {
4388:         Mat B;

4390:         PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4391:         for (i=0;i<n_vertices;i++) {
4392:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4393:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4394:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4395:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4396:           VecResetArray(pcis->vec1_B);
4397:           VecResetArray(pcbddc->vec1_R);
4398:         }
4399:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4400:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4401:         MatDestroy(&B);
4402:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4403:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4404:         MatScale(S_CV,m_one);
4405:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4406:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4407:         MatDestroy(&B);
4408:       }
4409:       if (lda_rhs != n_R) {
4410:         MatDestroy(&A_RRmA_RV);
4411:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4412:         MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4413:       }
4414:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4415:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4416:       if (need_benign_correction) {
4417:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4418:         PetscScalar      *marr,*sums;

4420:         PetscMalloc1(n_vertices,&sums);
4421:         MatDenseGetArray(S_VVt,&marr);
4422:         for (i=0;i<reuse_solver->benign_n;i++) {
4423:           const PetscScalar *vals;
4424:           const PetscInt    *idxs,*idxs_zero;
4425:           PetscInt          n,j,nz;

4427:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4428:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4429:           for (j=0;j<n_vertices;j++) {
4430:             PetscInt k;
4431:             sums[j] = 0.;
4432:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4433:           }
4434:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4435:           for (j=0;j<n;j++) {
4436:             PetscScalar val = vals[j];
4437:             PetscInt k;
4438:             for (k=0;k<n_vertices;k++) {
4439:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4440:             }
4441:           }
4442:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4443:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4444:         }
4445:         PetscFree(sums);
4446:         MatDenseRestoreArray(S_VVt,&marr);
4447:         MatDestroy(&A_RV_bcorr);
4448:       }
4449:       MatDestroy(&A_RRmA_RV);
4450:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4451:       MatDenseGetArray(A_VV,&x);
4452:       MatDenseGetArray(S_VVt,&y);
4453:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4454:       MatDenseRestoreArray(A_VV,&x);
4455:       MatDenseRestoreArray(S_VVt,&y);
4456:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4457:       MatDestroy(&S_VVt);
4458:     } else {
4459:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4460:     }
4461:     MatDestroy(&A_VV);

4463:     /* coarse basis functions */
4464:     for (i=0;i<n_vertices;i++) {
4465:       PetscScalar *y;

4467:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4468:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4469:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4470:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4471:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4472:       y[n_B*i+idx_V_B[i]] = 1.0;
4473:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4474:       VecResetArray(pcis->vec1_B);

4476:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4477:         PetscInt j;

4479:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4480:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4481:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4482:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4483:         VecResetArray(pcis->vec1_D);
4484:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4485:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4486:       }
4487:       VecResetArray(pcbddc->vec1_R);
4488:     }
4489:     /* if n_R == 0 the object is not destroyed */
4490:     MatDestroy(&A_RV);
4491:   }
4492:   VecDestroy(&dummy_vec);

4494:   if (n_constraints) {
4495:     Mat B;

4497:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4498:     MatScale(S_CC,m_one);
4499:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4500:     MatScale(S_CC,m_one);
4501:     if (n_vertices) {
4502:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4503:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4504:       } else {
4505:         Mat S_VCt;

4507:         if (lda_rhs != n_R) {
4508:           MatDestroy(&B);
4509:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4510:           MatSeqDenseSetLDA(B,lda_rhs);
4511:         }
4512:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4513:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4514:         MatDestroy(&S_VCt);
4515:       }
4516:     }
4517:     MatDestroy(&B);
4518:     /* coarse basis functions */
4519:     for (i=0;i<n_constraints;i++) {
4520:       PetscScalar *y;

4522:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4523:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4524:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4525:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4526:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4527:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4528:       VecResetArray(pcis->vec1_B);
4529:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4530:         PetscInt j;

4532:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4533:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4534:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4535:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4536:         VecResetArray(pcis->vec1_D);
4537:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4538:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4539:       }
4540:       VecResetArray(pcbddc->vec1_R);
4541:     }
4542:   }
4543:   if (n_constraints) {
4544:     MatDestroy(&local_auxmat2_R);
4545:   }
4546:   PetscFree(p0_lidx_I);

4548:   /* coarse matrix entries relative to B_0 */
4549:   if (pcbddc->benign_n) {
4550:     Mat         B0_B,B0_BPHI;
4551:     IS          is_dummy;
4552:     PetscScalar *data;
4553:     PetscInt    j;

4555:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4556:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4557:     ISDestroy(&is_dummy);
4558:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4559:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4560:     MatDenseGetArray(B0_BPHI,&data);
4561:     for (j=0;j<pcbddc->benign_n;j++) {
4562:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4563:       for (i=0;i<pcbddc->local_primal_size;i++) {
4564:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4565:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4566:       }
4567:     }
4568:     MatDenseRestoreArray(B0_BPHI,&data);
4569:     MatDestroy(&B0_B);
4570:     MatDestroy(&B0_BPHI);
4571:   }

4573:   /* compute other basis functions for non-symmetric problems */
4574:   if (!pcbddc->symmetric_primal) {
4575:     Mat         B_V=NULL,B_C=NULL;
4576:     PetscScalar *marray;

4578:     if (n_constraints) {
4579:       Mat S_CCT,C_CRT;

4581:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4582:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4583:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4584:       MatDestroy(&S_CCT);
4585:       if (n_vertices) {
4586:         Mat S_VCT;

4588:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4589:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4590:         MatDestroy(&S_VCT);
4591:       }
4592:       MatDestroy(&C_CRT);
4593:     } else {
4594:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4595:     }
4596:     if (n_vertices && n_R) {
4597:       PetscScalar    *av,*marray;
4598:       const PetscInt *xadj,*adjncy;
4599:       PetscInt       n;
4600:       PetscBool      flg_row;

4602:       /* B_V = B_V - A_VR^T */
4603:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4604:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4605:       MatSeqAIJGetArray(A_VR,&av);
4606:       MatDenseGetArray(B_V,&marray);
4607:       for (i=0;i<n;i++) {
4608:         PetscInt j;
4609:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4610:       }
4611:       MatDenseRestoreArray(B_V,&marray);
4612:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4613:       MatDestroy(&A_VR);
4614:     }

4616:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4617:     if (n_vertices) {
4618:       MatDenseGetArray(B_V,&marray);
4619:       for (i=0;i<n_vertices;i++) {
4620:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4621:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4622:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4623:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4624:         VecResetArray(pcbddc->vec1_R);
4625:         VecResetArray(pcbddc->vec2_R);
4626:       }
4627:       MatDenseRestoreArray(B_V,&marray);
4628:     }
4629:     if (B_C) {
4630:       MatDenseGetArray(B_C,&marray);
4631:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4632:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4633:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4634:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4635:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4636:         VecResetArray(pcbddc->vec1_R);
4637:         VecResetArray(pcbddc->vec2_R);
4638:       }
4639:       MatDenseRestoreArray(B_C,&marray);
4640:     }
4641:     /* coarse basis functions */
4642:     for (i=0;i<pcbddc->local_primal_size;i++) {
4643:       PetscScalar *y;

4645:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4646:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4647:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4648:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4649:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4650:       if (i<n_vertices) {
4651:         y[n_B*i+idx_V_B[i]] = 1.0;
4652:       }
4653:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4654:       VecResetArray(pcis->vec1_B);

4656:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4657:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4658:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4659:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4660:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4661:         VecResetArray(pcis->vec1_D);
4662:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4663:       }
4664:       VecResetArray(pcbddc->vec1_R);
4665:     }
4666:     MatDestroy(&B_V);
4667:     MatDestroy(&B_C);
4668:   }

4670:   /* free memory */
4671:   PetscFree(idx_V_B);
4672:   MatDestroy(&S_VV);
4673:   MatDestroy(&S_CV);
4674:   MatDestroy(&S_VC);
4675:   MatDestroy(&S_CC);
4676:   PetscFree(work);
4677:   if (n_vertices) {
4678:     MatDestroy(&A_VR);
4679:   }
4680:   if (n_constraints) {
4681:     MatDestroy(&C_CR);
4682:   }
4683:   /* Checking coarse_sub_mat and coarse basis functios */
4684:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4685:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4686:   if (pcbddc->dbg_flag) {
4687:     Mat         coarse_sub_mat;
4688:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4689:     Mat         coarse_phi_D,coarse_phi_B;
4690:     Mat         coarse_psi_D,coarse_psi_B;
4691:     Mat         A_II,A_BB,A_IB,A_BI;
4692:     Mat         C_B,CPHI;
4693:     IS          is_dummy;
4694:     Vec         mones;
4695:     MatType     checkmattype=MATSEQAIJ;
4696:     PetscReal   real_value;

4698:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4699:       Mat A;
4700:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4701:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4702:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4703:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4704:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4705:       MatDestroy(&A);
4706:     } else {
4707:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4708:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4709:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4710:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4711:     }
4712:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4713:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4714:     if (!pcbddc->symmetric_primal) {
4715:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4716:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4717:     }
4718:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4720:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4721:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4722:     PetscViewerFlush(pcbddc->dbg_viewer);
4723:     if (!pcbddc->symmetric_primal) {
4724:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4725:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4726:       MatDestroy(&AUXMAT);
4727:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4728:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4729:       MatDestroy(&AUXMAT);
4730:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4731:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4732:       MatDestroy(&AUXMAT);
4733:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4734:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4735:       MatDestroy(&AUXMAT);
4736:     } else {
4737:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4738:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4739:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4740:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4741:       MatDestroy(&AUXMAT);
4742:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4743:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4744:       MatDestroy(&AUXMAT);
4745:     }
4746:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4747:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4748:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4749:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4750:     if (pcbddc->benign_n) {
4751:       Mat         B0_B,B0_BPHI;
4752:       PetscScalar *data,*data2;
4753:       PetscInt    j;

4755:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4756:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4757:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4758:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4759:       MatDenseGetArray(TM1,&data);
4760:       MatDenseGetArray(B0_BPHI,&data2);
4761:       for (j=0;j<pcbddc->benign_n;j++) {
4762:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4763:         for (i=0;i<pcbddc->local_primal_size;i++) {
4764:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4765:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4766:         }
4767:       }
4768:       MatDenseRestoreArray(TM1,&data);
4769:       MatDenseRestoreArray(B0_BPHI,&data2);
4770:       MatDestroy(&B0_B);
4771:       ISDestroy(&is_dummy);
4772:       MatDestroy(&B0_BPHI);
4773:     }
4774: #if 0
4775:   {
4776:     PetscViewer viewer;
4777:     char filename[256];
4778:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4779:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4780:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4781:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4782:     MatView(coarse_sub_mat,viewer);
4783:     PetscObjectSetName((PetscObject)TM1,"projected");
4784:     MatView(TM1,viewer);
4785:     if (pcbddc->coarse_phi_B) {
4786:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4787:       MatView(pcbddc->coarse_phi_B,viewer);
4788:     }
4789:     if (pcbddc->coarse_phi_D) {
4790:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4791:       MatView(pcbddc->coarse_phi_D,viewer);
4792:     }
4793:     if (pcbddc->coarse_psi_B) {
4794:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4795:       MatView(pcbddc->coarse_psi_B,viewer);
4796:     }
4797:     if (pcbddc->coarse_psi_D) {
4798:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4799:       MatView(pcbddc->coarse_psi_D,viewer);
4800:     }
4801:     PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4802:     MatView(pcbddc->local_mat,viewer);
4803:     PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4804:     MatView(pcbddc->ConstraintMatrix,viewer);
4805:     PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4806:     ISView(pcis->is_I_local,viewer);
4807:     PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4808:     ISView(pcis->is_B_local,viewer);
4809:     PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4810:     ISView(pcbddc->is_R_local,viewer);
4811:     PetscViewerDestroy(&viewer);
4812:   }
4813: #endif
4814:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4815:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4816:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4817:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4819:     /* check constraints */
4820:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4821:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4822:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4823:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4824:     } else {
4825:       PetscScalar *data;
4826:       Mat         tmat;
4827:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4828:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4829:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4830:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4831:       MatDestroy(&tmat);
4832:     }
4833:     MatCreateVecs(CPHI,&mones,NULL);
4834:     VecSet(mones,-1.0);
4835:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4836:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4837:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4838:     if (!pcbddc->symmetric_primal) {
4839:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4840:       VecSet(mones,-1.0);
4841:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4842:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4843:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4844:     }
4845:     MatDestroy(&C_B);
4846:     MatDestroy(&CPHI);
4847:     ISDestroy(&is_dummy);
4848:     VecDestroy(&mones);
4849:     PetscViewerFlush(pcbddc->dbg_viewer);
4850:     MatDestroy(&A_II);
4851:     MatDestroy(&A_BB);
4852:     MatDestroy(&A_IB);
4853:     MatDestroy(&A_BI);
4854:     MatDestroy(&TM1);
4855:     MatDestroy(&TM2);
4856:     MatDestroy(&TM3);
4857:     MatDestroy(&TM4);
4858:     MatDestroy(&coarse_phi_D);
4859:     MatDestroy(&coarse_phi_B);
4860:     if (!pcbddc->symmetric_primal) {
4861:       MatDestroy(&coarse_psi_D);
4862:       MatDestroy(&coarse_psi_B);
4863:     }
4864:     MatDestroy(&coarse_sub_mat);
4865:   }
4866:   /* get back data */
4867:   *coarse_submat_vals_n = coarse_submat_vals;
4868:   PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4869:   return(0);
4870: }

4872: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4873: {
4874:   Mat            *work_mat;
4875:   IS             isrow_s,iscol_s;
4876:   PetscBool      rsorted,csorted;
4877:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4881:   ISSorted(isrow,&rsorted);
4882:   ISSorted(iscol,&csorted);
4883:   ISGetLocalSize(isrow,&rsize);
4884:   ISGetLocalSize(iscol,&csize);

4886:   if (!rsorted) {
4887:     const PetscInt *idxs;
4888:     PetscInt *idxs_sorted,i;

4890:     PetscMalloc1(rsize,&idxs_perm_r);
4891:     PetscMalloc1(rsize,&idxs_sorted);
4892:     for (i=0;i<rsize;i++) {
4893:       idxs_perm_r[i] = i;
4894:     }
4895:     ISGetIndices(isrow,&idxs);
4896:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4897:     for (i=0;i<rsize;i++) {
4898:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4899:     }
4900:     ISRestoreIndices(isrow,&idxs);
4901:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4902:   } else {
4903:     PetscObjectReference((PetscObject)isrow);
4904:     isrow_s = isrow;
4905:   }

4907:   if (!csorted) {
4908:     if (isrow == iscol) {
4909:       PetscObjectReference((PetscObject)isrow_s);
4910:       iscol_s = isrow_s;
4911:     } else {
4912:       const PetscInt *idxs;
4913:       PetscInt       *idxs_sorted,i;

4915:       PetscMalloc1(csize,&idxs_perm_c);
4916:       PetscMalloc1(csize,&idxs_sorted);
4917:       for (i=0;i<csize;i++) {
4918:         idxs_perm_c[i] = i;
4919:       }
4920:       ISGetIndices(iscol,&idxs);
4921:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4922:       for (i=0;i<csize;i++) {
4923:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4924:       }
4925:       ISRestoreIndices(iscol,&idxs);
4926:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4927:     }
4928:   } else {
4929:     PetscObjectReference((PetscObject)iscol);
4930:     iscol_s = iscol;
4931:   }

4933:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

4935:   if (!rsorted || !csorted) {
4936:     Mat      new_mat;
4937:     IS       is_perm_r,is_perm_c;

4939:     if (!rsorted) {
4940:       PetscInt *idxs_r,i;
4941:       PetscMalloc1(rsize,&idxs_r);
4942:       for (i=0;i<rsize;i++) {
4943:         idxs_r[idxs_perm_r[i]] = i;
4944:       }
4945:       PetscFree(idxs_perm_r);
4946:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4947:     } else {
4948:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4949:     }
4950:     ISSetPermutation(is_perm_r);

4952:     if (!csorted) {
4953:       if (isrow_s == iscol_s) {
4954:         PetscObjectReference((PetscObject)is_perm_r);
4955:         is_perm_c = is_perm_r;
4956:       } else {
4957:         PetscInt *idxs_c,i;
4958:         if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4959:         PetscMalloc1(csize,&idxs_c);
4960:         for (i=0;i<csize;i++) {
4961:           idxs_c[idxs_perm_c[i]] = i;
4962:         }
4963:         PetscFree(idxs_perm_c);
4964:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4965:       }
4966:     } else {
4967:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4968:     }
4969:     ISSetPermutation(is_perm_c);

4971:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4972:     MatDestroy(&work_mat[0]);
4973:     work_mat[0] = new_mat;
4974:     ISDestroy(&is_perm_r);
4975:     ISDestroy(&is_perm_c);
4976:   }

4978:   PetscObjectReference((PetscObject)work_mat[0]);
4979:   *B = work_mat[0];
4980:   MatDestroyMatrices(1,&work_mat);
4981:   ISDestroy(&isrow_s);
4982:   ISDestroy(&iscol_s);
4983:   return(0);
4984: }

4986: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4987: {
4988:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4989:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4990:   Mat            new_mat,lA;
4991:   IS             is_local,is_global;
4992:   PetscInt       local_size;
4993:   PetscBool      isseqaij;

4997:   MatDestroy(&pcbddc->local_mat);
4998:   MatGetSize(matis->A,&local_size,NULL);
4999:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5000:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
5001:   ISDestroy(&is_local);
5002:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5003:   ISDestroy(&is_global);

5005:   /* check */
5006:   if (pcbddc->dbg_flag) {
5007:     Vec       x,x_change;
5008:     PetscReal error;

5010:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5011:     VecSetRandom(x,NULL);
5012:     MatMult(ChangeOfBasisMatrix,x,x_change);
5013:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5014:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5015:     MatMult(new_mat,matis->x,matis->y);
5016:     if (!pcbddc->change_interior) {
5017:       const PetscScalar *x,*y,*v;
5018:       PetscReal         lerror = 0.;
5019:       PetscInt          i;

5021:       VecGetArrayRead(matis->x,&x);
5022:       VecGetArrayRead(matis->y,&y);
5023:       VecGetArrayRead(matis->counter,&v);
5024:       for (i=0;i<local_size;i++)
5025:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5026:           lerror = PetscAbsScalar(x[i]-y[i]);
5027:       VecRestoreArrayRead(matis->x,&x);
5028:       VecRestoreArrayRead(matis->y,&y);
5029:       VecRestoreArrayRead(matis->counter,&v);
5030:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
5031:       if (error > PETSC_SMALL) {
5032:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5033:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5034:         } else {
5035:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5036:         }
5037:       }
5038:     }
5039:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5040:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5041:     VecAXPY(x,-1.0,x_change);
5042:     VecNorm(x,NORM_INFINITY,&error);
5043:     if (error > PETSC_SMALL) {
5044:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5045:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5046:       } else {
5047:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5048:       }
5049:     }
5050:     VecDestroy(&x);
5051:     VecDestroy(&x_change);
5052:   }

5054:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5055:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

5057:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5058:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5059:   if (isseqaij) {
5060:     MatDestroy(&pcbddc->local_mat);
5061:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5062:     if (lA) {
5063:       Mat work;
5064:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5065:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5066:       MatDestroy(&work);
5067:     }
5068:   } else {
5069:     Mat work_mat;

5071:     MatDestroy(&pcbddc->local_mat);
5072:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5073:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5074:     MatDestroy(&work_mat);
5075:     if (lA) {
5076:       Mat work;
5077:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5078:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5079:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5080:       MatDestroy(&work);
5081:     }
5082:   }
5083:   if (matis->A->symmetric_set) {
5084:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5085: #if !defined(PETSC_USE_COMPLEX)
5086:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5087: #endif
5088:   }
5089:   MatDestroy(&new_mat);
5090:   return(0);
5091: }

5093: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5094: {
5095:   PC_IS*          pcis = (PC_IS*)(pc->data);
5096:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
5097:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5098:   PetscInt        *idx_R_local=NULL;
5099:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
5100:   PetscInt        vbs,bs;
5101:   PetscBT         bitmask=NULL;
5102:   PetscErrorCode  ierr;

5105:   /*
5106:     No need to setup local scatters if
5107:       - primal space is unchanged
5108:         AND
5109:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5110:         AND
5111:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5112:   */
5113:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5114:     return(0);
5115:   }
5116:   /* destroy old objects */
5117:   ISDestroy(&pcbddc->is_R_local);
5118:   VecScatterDestroy(&pcbddc->R_to_B);
5119:   VecScatterDestroy(&pcbddc->R_to_D);
5120:   /* Set Non-overlapping dimensions */
5121:   n_B = pcis->n_B;
5122:   n_D = pcis->n - n_B;
5123:   n_vertices = pcbddc->n_vertices;

5125:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

5127:   /* create auxiliary bitmask and allocate workspace */
5128:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5129:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5130:     PetscBTCreate(pcis->n,&bitmask);
5131:     for (i=0;i<n_vertices;i++) {
5132:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5133:     }

5135:     for (i=0, n_R=0; i<pcis->n; i++) {
5136:       if (!PetscBTLookup(bitmask,i)) {
5137:         idx_R_local[n_R++] = i;
5138:       }
5139:     }
5140:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5141:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5143:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5144:     ISGetLocalSize(reuse_solver->is_R,&n_R);
5145:   }

5147:   /* Block code */
5148:   vbs = 1;
5149:   MatGetBlockSize(pcbddc->local_mat,&bs);
5150:   if (bs>1 && !(n_vertices%bs)) {
5151:     PetscBool is_blocked = PETSC_TRUE;
5152:     PetscInt  *vary;
5153:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5154:       PetscMalloc1(pcis->n/bs,&vary);
5155:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
5156:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5157:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5158:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5159:       for (i=0; i<pcis->n/bs; i++) {
5160:         if (vary[i]!=0 && vary[i]!=bs) {
5161:           is_blocked = PETSC_FALSE;
5162:           break;
5163:         }
5164:       }
5165:       PetscFree(vary);
5166:     } else {
5167:       /* Verify directly the R set */
5168:       for (i=0; i<n_R/bs; i++) {
5169:         PetscInt j,node=idx_R_local[bs*i];
5170:         for (j=1; j<bs; j++) {
5171:           if (node != idx_R_local[bs*i+j]-j) {
5172:             is_blocked = PETSC_FALSE;
5173:             break;
5174:           }
5175:         }
5176:       }
5177:     }
5178:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5179:       vbs = bs;
5180:       for (i=0;i<n_R/vbs;i++) {
5181:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5182:       }
5183:     }
5184:   }
5185:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5186:   if (sub_schurs && sub_schurs->reuse_solver) {
5187:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5189:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5190:     ISDestroy(&reuse_solver->is_R);
5191:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
5192:     reuse_solver->is_R = pcbddc->is_R_local;
5193:   } else {
5194:     PetscFree(idx_R_local);
5195:   }

5197:   /* print some info if requested */
5198:   if (pcbddc->dbg_flag) {
5199:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5200:     PetscViewerFlush(pcbddc->dbg_viewer);
5201:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5202:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5203:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5204:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5205:     PetscViewerFlush(pcbddc->dbg_viewer);
5206:   }

5208:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5209:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5210:     IS       is_aux1,is_aux2;
5211:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

5213:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5214:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5215:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5216:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5217:     for (i=0; i<n_D; i++) {
5218:       PetscBTSet(bitmask,is_indices[i]);
5219:     }
5220:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5221:     for (i=0, j=0; i<n_R; i++) {
5222:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5223:         aux_array1[j++] = i;
5224:       }
5225:     }
5226:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5227:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5228:     for (i=0, j=0; i<n_B; i++) {
5229:       if (!PetscBTLookup(bitmask,is_indices[i])) {
5230:         aux_array2[j++] = i;
5231:       }
5232:     }
5233:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5234:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5235:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5236:     ISDestroy(&is_aux1);
5237:     ISDestroy(&is_aux2);

5239:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5240:       PetscMalloc1(n_D,&aux_array1);
5241:       for (i=0, j=0; i<n_R; i++) {
5242:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
5243:           aux_array1[j++] = i;
5244:         }
5245:       }
5246:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5247:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5248:       ISDestroy(&is_aux1);
5249:     }
5250:     PetscBTDestroy(&bitmask);
5251:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5252:   } else {
5253:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5254:     IS                 tis;
5255:     PetscInt           schur_size;

5257:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
5258:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5259:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5260:     ISDestroy(&tis);
5261:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5262:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5263:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5264:       ISDestroy(&tis);
5265:     }
5266:   }
5267:   return(0);
5268: }

5270: static PetscErrorCode MatNullSpacePropagate_Private(Mat A, IS is, Mat B)
5271: {
5272:   MatNullSpace   NullSpace;
5273:   Mat            dmat;
5274:   const Vec      *nullvecs;
5275:   Vec            v,v2,*nullvecs2;
5276:   VecScatter     sct;
5277:   PetscInt       k,nnsp_size,bsiz,n,N,bs;
5278:   PetscBool      nnsp_has_cnst;

5282:   MatGetNullSpace(B,&NullSpace);
5283:   if (!NullSpace) {
5284:     MatGetNearNullSpace(B,&NullSpace);
5285:   }
5286:   if (NullSpace) return(0);
5287:   MatGetNullSpace(A,&NullSpace);
5288:   if (!NullSpace) {
5289:     MatGetNearNullSpace(A,&NullSpace);
5290:   }
5291:   if (!NullSpace) return(0);
5292:   MatCreateVecs(A,&v,NULL);
5293:   MatCreateVecs(B,&v2,NULL);
5294:   VecScatterCreate(v,is,v2,NULL,&sct);
5295:   MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5296:   bsiz = nnsp_size+!!nnsp_has_cnst;
5297:   PetscMalloc1(bsiz,&nullvecs2);
5298:   VecGetBlockSize(v2,&bs);
5299:   VecGetSize(v2,&N);
5300:   VecGetLocalSize(v2,&n);
5301:   MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz,NULL,&dmat);
5302:   for (k=0;k<nnsp_size;k++) {
5303:     PetscScalar *arr;

5305:     MatDenseGetColumn(dmat,k,&arr);
5306:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,arr,&nullvecs2[k]);
5307:     VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5308:     VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5309:     MatDenseRestoreColumn(dmat,&arr);
5310:   }
5311:   if (nnsp_has_cnst) {
5312:     PetscScalar *arr;

5314:     MatDenseGetColumn(dmat,nnsp_size,&arr);
5315:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,arr,&nullvecs2[nnsp_size]);
5316:     VecSet(nullvecs2[nnsp_size],1.0);
5317:     MatDenseRestoreColumn(dmat,&arr);
5318:   }
5319:   PCBDDCOrthonormalizeVecs(bsiz,nullvecs2);
5320:   MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz,nullvecs2,&NullSpace);
5321:   PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5322:   MatDestroy(&dmat);
5323:   for (k=0;k<bsiz;k++) {
5324:     VecDestroy(&nullvecs2[k]);
5325:   }
5326:   PetscFree(nullvecs2);
5327:   MatSetNearNullSpace(B,NullSpace);
5328:   MatNullSpaceDestroy(&NullSpace);
5329:   VecDestroy(&v);
5330:   VecDestroy(&v2);
5331:   VecScatterDestroy(&sct);
5332:   return(0);
5333: }

5335: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5336: {
5337:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
5338:   PC_IS          *pcis = (PC_IS*)pc->data;
5339:   PC             pc_temp;
5340:   Mat            A_RR;
5341:   MatNullSpace   nnsp;
5342:   MatReuse       reuse;
5343:   PetscScalar    m_one = -1.0;
5344:   PetscReal      value;
5345:   PetscInt       n_D,n_R;
5346:   PetscBool      issbaij,opts;
5348:   void           (*f)(void) = 0;
5349:   char           dir_prefix[256],neu_prefix[256],str_level[16];
5350:   size_t         len;

5353:   PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5354:   /* compute prefixes */
5355:   PetscStrcpy(dir_prefix,"");
5356:   PetscStrcpy(neu_prefix,"");
5357:   if (!pcbddc->current_level) {
5358:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5359:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5360:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5361:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5362:   } else {
5363:     PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5364:     PetscStrlen(((PetscObject)pc)->prefix,&len);
5365:     len -= 15; /* remove "pc_bddc_coarse_" */
5366:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5367:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5368:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5369:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5370:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5371:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5372:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5373:     PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5374:     PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5375:   }

5377:   /* DIRICHLET PROBLEM */
5378:   if (dirichlet) {
5379:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5380:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5381:       if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5382:       if (pcbddc->dbg_flag) {
5383:         Mat    A_IIn;

5385:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5386:         MatDestroy(&pcis->A_II);
5387:         pcis->A_II = A_IIn;
5388:       }
5389:     }
5390:     if (pcbddc->local_mat->symmetric_set) {
5391:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5392:     }
5393:     /* Matrix for Dirichlet problem is pcis->A_II */
5394:     n_D  = pcis->n - pcis->n_B;
5395:     opts = PETSC_FALSE;
5396:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5397:       opts = PETSC_TRUE;
5398:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5399:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5400:       /* default */
5401:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5402:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5403:       PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5404:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5405:       if (issbaij) {
5406:         PCSetType(pc_temp,PCCHOLESKY);
5407:       } else {
5408:         PCSetType(pc_temp,PCLU);
5409:       }
5410:       KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5411:     }
5412:     MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5413:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5414:     /* Allow user's customization */
5415:     if (opts) {
5416:       KSPSetFromOptions(pcbddc->ksp_D);
5417:     }
5418:     if (pcbddc->NullSpace_corr[0]) { /* approximate solver, propagate NearNullSpace */
5419:       MatNullSpacePropagate_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5420:     }
5421:     MatGetNearNullSpace(pcis->pA_II,&nnsp);
5422:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
5423:     PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5424:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5425:       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5426:       const PetscInt *idxs;
5427:       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5429:       ISGetLocalSize(pcis->is_I_local,&nl);
5430:       ISGetIndices(pcis->is_I_local,&idxs);
5431:       PetscMalloc1(nl*cdim,&scoords);
5432:       for (i=0;i<nl;i++) {
5433:         for (d=0;d<cdim;d++) {
5434:           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5435:         }
5436:       }
5437:       ISRestoreIndices(pcis->is_I_local,&idxs);
5438:       PCSetCoordinates(pc_temp,cdim,nl,scoords);
5439:       PetscFree(scoords);
5440:     }
5441:     if (sub_schurs && sub_schurs->reuse_solver) {
5442:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5444:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5445:     }

5447:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5448:     if (!n_D) {
5449:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5450:       PCSetType(pc_temp,PCNONE);
5451:     }
5452:     /* set ksp_D into pcis data */
5453:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
5454:     KSPDestroy(&pcis->ksp_D);
5455:     pcis->ksp_D = pcbddc->ksp_D;
5456:   }

5458:   /* NEUMANN PROBLEM */
5459:   A_RR = 0;
5460:   if (neumann) {
5461:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5462:     PetscInt        ibs,mbs;
5463:     PetscBool       issbaij, reuse_neumann_solver;
5464:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

5466:     reuse_neumann_solver = PETSC_FALSE;
5467:     if (sub_schurs && sub_schurs->reuse_solver) {
5468:       IS iP;

5470:       reuse_neumann_solver = PETSC_TRUE;
5471:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5472:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5473:     }
5474:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5475:     ISGetSize(pcbddc->is_R_local,&n_R);
5476:     if (pcbddc->ksp_R) { /* already created ksp */
5477:       PetscInt nn_R;
5478:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5479:       PetscObjectReference((PetscObject)A_RR);
5480:       MatGetSize(A_RR,&nn_R,NULL);
5481:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5482:         KSPReset(pcbddc->ksp_R);
5483:         MatDestroy(&A_RR);
5484:         reuse = MAT_INITIAL_MATRIX;
5485:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5486:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5487:           MatDestroy(&A_RR);
5488:           reuse = MAT_INITIAL_MATRIX;
5489:         } else { /* safe to reuse the matrix */
5490:           reuse = MAT_REUSE_MATRIX;
5491:         }
5492:       }
5493:       /* last check */
5494:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5495:         MatDestroy(&A_RR);
5496:         reuse = MAT_INITIAL_MATRIX;
5497:       }
5498:     } else { /* first time, so we need to create the matrix */
5499:       reuse = MAT_INITIAL_MATRIX;
5500:     }
5501:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5502:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5503:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5504:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5505:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5506:       if (matis->A == pcbddc->local_mat) {
5507:         MatDestroy(&pcbddc->local_mat);
5508:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5509:       } else {
5510:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5511:       }
5512:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5513:       if (matis->A == pcbddc->local_mat) {
5514:         MatDestroy(&pcbddc->local_mat);
5515:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5516:       } else {
5517:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5518:       }
5519:     }
5520:     /* extract A_RR */
5521:     if (reuse_neumann_solver) {
5522:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5524:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5525:         MatDestroy(&A_RR);
5526:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5527:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5528:         } else {
5529:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5530:         }
5531:       } else {
5532:         MatDestroy(&A_RR);
5533:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5534:         PetscObjectReference((PetscObject)A_RR);
5535:       }
5536:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5537:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5538:     }
5539:     if (pcbddc->local_mat->symmetric_set) {
5540:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5541:     }
5542:     opts = PETSC_FALSE;
5543:     if (!pcbddc->ksp_R) { /* create object if not present */
5544:       opts = PETSC_TRUE;
5545:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5546:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5547:       /* default */
5548:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5549:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5550:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5551:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5552:       if (issbaij) {
5553:         PCSetType(pc_temp,PCCHOLESKY);
5554:       } else {
5555:         PCSetType(pc_temp,PCLU);
5556:       }
5557:       KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5558:     }
5559:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5560:     MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5561:     if (opts) { /* Allow user's customization once */
5562:       KSPSetFromOptions(pcbddc->ksp_R);
5563:     }
5564:     if (pcbddc->NullSpace_corr[2]) { /* approximate solver, propagate NearNullSpace */
5565:       MatNullSpacePropagate_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5566:     }
5567:     MatGetNearNullSpace(A_RR,&nnsp);
5568:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
5569:     PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5570:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5571:       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5572:       const PetscInt *idxs;
5573:       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5575:       ISGetLocalSize(pcbddc->is_R_local,&nl);
5576:       ISGetIndices(pcbddc->is_R_local,&idxs);
5577:       PetscMalloc1(nl*cdim,&scoords);
5578:       for (i=0;i<nl;i++) {
5579:         for (d=0;d<cdim;d++) {
5580:           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5581:         }
5582:       }
5583:       ISRestoreIndices(pcbddc->is_R_local,&idxs);
5584:       PCSetCoordinates(pc_temp,cdim,nl,scoords);
5585:       PetscFree(scoords);
5586:     }

5588:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5589:     if (!n_R) {
5590:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5591:       PCSetType(pc_temp,PCNONE);
5592:     }
5593:     /* Reuse solver if it is present */
5594:     if (reuse_neumann_solver) {
5595:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5597:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5598:     }
5599:   }

5601:   if (pcbddc->dbg_flag) {
5602:     PetscViewerFlush(pcbddc->dbg_viewer);
5603:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5604:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5605:   }

5607:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5608:   if (pcbddc->NullSpace_corr[0]) {
5609:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5610:   }
5611:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5612:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5613:   }
5614:   if (neumann && pcbddc->NullSpace_corr[2]) {
5615:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5616:   }
5617:   /* check Dirichlet and Neumann solvers */
5618:   if (pcbddc->dbg_flag) {
5619:     if (dirichlet) { /* Dirichlet */
5620:       VecSetRandom(pcis->vec1_D,NULL);
5621:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5622:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5623:       KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5624:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5625:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5626:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5627:       PetscViewerFlush(pcbddc->dbg_viewer);
5628:     }
5629:     if (neumann) { /* Neumann */
5630:       VecSetRandom(pcbddc->vec1_R,NULL);
5631:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5632:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5633:       KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5634:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5635:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5636:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5637:       PetscViewerFlush(pcbddc->dbg_viewer);
5638:     }
5639:   }
5640:   /* free Neumann problem's matrix */
5641:   MatDestroy(&A_RR);
5642:   PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5643:   return(0);
5644: }

5646: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5647: {
5648:   PetscErrorCode  ierr;
5649:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5650:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5651:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;

5654:   if (!reuse_solver) {
5655:     VecSet(pcbddc->vec1_R,0.);
5656:   }
5657:   if (!pcbddc->switch_static) {
5658:     if (applytranspose && pcbddc->local_auxmat1) {
5659:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5660:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5661:     }
5662:     if (!reuse_solver) {
5663:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5664:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5665:     } else {
5666:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5668:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5669:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5670:     }
5671:   } else {
5672:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5673:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5674:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5675:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5676:     if (applytranspose && pcbddc->local_auxmat1) {
5677:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5678:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5679:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5680:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5681:     }
5682:   }
5683:   if (!reuse_solver || pcbddc->switch_static) {
5684:     if (applytranspose) {
5685:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5686:     } else {
5687:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5688:     }
5689:     KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5690:   } else {
5691:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5693:     if (applytranspose) {
5694:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5695:     } else {
5696:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5697:     }
5698:   }
5699:   VecSet(inout_B,0.);
5700:   if (!pcbddc->switch_static) {
5701:     if (!reuse_solver) {
5702:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5703:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5704:     } else {
5705:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5707:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5708:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5709:     }
5710:     if (!applytranspose && pcbddc->local_auxmat1) {
5711:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5712:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5713:     }
5714:   } else {
5715:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5716:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5717:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5718:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5719:     if (!applytranspose && pcbddc->local_auxmat1) {
5720:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5721:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5722:     }
5723:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5724:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5725:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5726:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5727:   }
5728:   return(0);
5729: }

5731: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5732: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5733: {
5735:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5736:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5737:   const PetscScalar zero = 0.0;

5740:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5741:   if (!pcbddc->benign_apply_coarse_only) {
5742:     if (applytranspose) {
5743:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5744:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5745:     } else {
5746:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5747:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5748:     }
5749:   } else {
5750:     VecSet(pcbddc->vec1_P,zero);
5751:   }

5753:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5754:   if (pcbddc->benign_n) {
5755:     PetscScalar *array;
5756:     PetscInt    j;

5758:     VecGetArray(pcbddc->vec1_P,&array);
5759:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5760:     VecRestoreArray(pcbddc->vec1_P,&array);
5761:   }

5763:   /* start communications from local primal nodes to rhs of coarse solver */
5764:   VecSet(pcbddc->coarse_vec,zero);
5765:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5766:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5768:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5769:   if (pcbddc->coarse_ksp) {
5770:     Mat          coarse_mat;
5771:     Vec          rhs,sol;
5772:     MatNullSpace nullsp;
5773:     PetscBool    isbddc = PETSC_FALSE;

5775:     if (pcbddc->benign_have_null) {
5776:       PC        coarse_pc;

5778:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5779:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5780:       /* we need to propagate to coarser levels the need for a possible benign correction */
5781:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5782:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5783:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5784:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5785:       }
5786:     }
5787:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5788:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5789:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5790:     if (applytranspose) {
5791:       if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5792:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5793:       KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5794:       MatGetTransposeNullSpace(coarse_mat,&nullsp);
5795:       if (nullsp) {
5796:         MatNullSpaceRemove(nullsp,sol);
5797:       }
5798:     } else {
5799:       MatGetNullSpace(coarse_mat,&nullsp);
5800:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5801:         PC        coarse_pc;

5803:         if (nullsp) {
5804:           MatNullSpaceRemove(nullsp,rhs);
5805:         }
5806:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5807:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5808:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5809:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5810:       } else {
5811:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5812:         KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5813:         if (nullsp) {
5814:           MatNullSpaceRemove(nullsp,sol);
5815:         }
5816:       }
5817:     }
5818:     /* we don't need the benign correction at coarser levels anymore */
5819:     if (pcbddc->benign_have_null && isbddc) {
5820:       PC        coarse_pc;
5821:       PC_BDDC*  coarsepcbddc;

5823:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5824:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5825:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5826:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5827:     }
5828:   }

5830:   /* Local solution on R nodes */
5831:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5832:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5833:   }
5834:   /* communications from coarse sol to local primal nodes */
5835:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5836:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5838:   /* Sum contributions from the two levels */
5839:   if (!pcbddc->benign_apply_coarse_only) {
5840:     if (applytranspose) {
5841:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5842:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5843:     } else {
5844:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5845:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5846:     }
5847:     /* store p0 */
5848:     if (pcbddc->benign_n) {
5849:       PetscScalar *array;
5850:       PetscInt    j;

5852:       VecGetArray(pcbddc->vec1_P,&array);
5853:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5854:       VecRestoreArray(pcbddc->vec1_P,&array);
5855:     }
5856:   } else { /* expand the coarse solution */
5857:     if (applytranspose) {
5858:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5859:     } else {
5860:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5861:     }
5862:   }
5863:   return(0);
5864: }

5866: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5867: {
5869:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5870:   PetscScalar    *array;
5871:   Vec            from,to;

5874:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5875:     from = pcbddc->coarse_vec;
5876:     to = pcbddc->vec1_P;
5877:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5878:       Vec tvec;

5880:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5881:       VecResetArray(tvec);
5882:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5883:       VecGetArray(tvec,&array);
5884:       VecPlaceArray(from,array);
5885:       VecRestoreArray(tvec,&array);
5886:     }
5887:   } else { /* from local to global -> put data in coarse right hand side */
5888:     from = pcbddc->vec1_P;
5889:     to = pcbddc->coarse_vec;
5890:   }
5891:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5892:   return(0);
5893: }

5895: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5896: {
5898:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5899:   PetscScalar    *array;
5900:   Vec            from,to;

5903:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5904:     from = pcbddc->coarse_vec;
5905:     to = pcbddc->vec1_P;
5906:   } else { /* from local to global -> put data in coarse right hand side */
5907:     from = pcbddc->vec1_P;
5908:     to = pcbddc->coarse_vec;
5909:   }
5910:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5911:   if (smode == SCATTER_FORWARD) {
5912:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5913:       Vec tvec;

5915:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5916:       VecGetArray(to,&array);
5917:       VecPlaceArray(tvec,array);
5918:       VecRestoreArray(to,&array);
5919:     }
5920:   } else {
5921:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5922:      VecResetArray(from);
5923:     }
5924:   }
5925:   return(0);
5926: }

5928: /* uncomment for testing purposes */
5929: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5930: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5931: {
5932:   PetscErrorCode    ierr;
5933:   PC_IS*            pcis = (PC_IS*)(pc->data);
5934:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
5935:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
5936:   /* one and zero */
5937:   PetscScalar       one=1.0,zero=0.0;
5938:   /* space to store constraints and their local indices */
5939:   PetscScalar       *constraints_data;
5940:   PetscInt          *constraints_idxs,*constraints_idxs_B;
5941:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
5942:   PetscInt          *constraints_n;
5943:   /* iterators */
5944:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
5945:   /* BLAS integers */
5946:   PetscBLASInt      lwork,lierr;
5947:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
5948:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
5949:   /* reuse */
5950:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
5951:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
5952:   /* change of basis */
5953:   PetscBool         qr_needed;
5954:   PetscBT           change_basis,qr_needed_idx;
5955:   /* auxiliary stuff */
5956:   PetscInt          *nnz,*is_indices;
5957:   PetscInt          ncc;
5958:   /* some quantities */
5959:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
5960:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5961:   PetscReal         tol; /* tolerance for retaining eigenmodes */

5964:   tol  = PetscSqrtReal(PETSC_SMALL);
5965:   /* Destroy Mat objects computed previously */
5966:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5967:   MatDestroy(&pcbddc->ConstraintMatrix);
5968:   MatDestroy(&pcbddc->switch_static_change);
5969:   /* save info on constraints from previous setup (if any) */
5970:   olocal_primal_size = pcbddc->local_primal_size;
5971:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5972:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5973:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5974:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5975:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5976:   PetscFree(pcbddc->primal_indices_local_idxs);

5978:   if (!pcbddc->adaptive_selection) {
5979:     IS           ISForVertices,*ISForFaces,*ISForEdges;
5980:     MatNullSpace nearnullsp;
5981:     const Vec    *nearnullvecs;
5982:     Vec          *localnearnullsp;
5983:     PetscScalar  *array;
5984:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
5985:     PetscBool    nnsp_has_cnst;
5986:     /* LAPACK working arrays for SVD or POD */
5987:     PetscBool    skip_lapack,boolforchange;
5988:     PetscScalar  *work;
5989:     PetscReal    *singular_vals;
5990: #if defined(PETSC_USE_COMPLEX)
5991:     PetscReal    *rwork;
5992: #endif
5993: #if defined(PETSC_MISSING_LAPACK_GESVD)
5994:     PetscScalar  *temp_basis,*correlation_mat;
5995: #else
5996:     PetscBLASInt dummy_int=1;
5997:     PetscScalar  dummy_scalar=1.;
5998: #endif

6000:     /* Get index sets for faces, edges and vertices from graph */
6001:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6002:     /* print some info */
6003:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6004:       PetscInt nv;

6006:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6007:       ISGetSize(ISForVertices,&nv);
6008:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6009:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6010:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6011:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6012:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6013:       PetscViewerFlush(pcbddc->dbg_viewer);
6014:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6015:     }

6017:     /* free unneeded index sets */
6018:     if (!pcbddc->use_vertices) {
6019:       ISDestroy(&ISForVertices);
6020:     }
6021:     if (!pcbddc->use_edges) {
6022:       for (i=0;i<n_ISForEdges;i++) {
6023:         ISDestroy(&ISForEdges[i]);
6024:       }
6025:       PetscFree(ISForEdges);
6026:       n_ISForEdges = 0;
6027:     }
6028:     if (!pcbddc->use_faces) {
6029:       for (i=0;i<n_ISForFaces;i++) {
6030:         ISDestroy(&ISForFaces[i]);
6031:       }
6032:       PetscFree(ISForFaces);
6033:       n_ISForFaces = 0;
6034:     }

6036:     /* check if near null space is attached to global mat */
6037:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
6038:     if (nearnullsp) {
6039:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6040:       /* remove any stored info */
6041:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
6042:       PetscFree(pcbddc->onearnullvecs_state);
6043:       /* store information for BDDC solver reuse */
6044:       PetscObjectReference((PetscObject)nearnullsp);
6045:       pcbddc->onearnullspace = nearnullsp;
6046:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6047:       for (i=0;i<nnsp_size;i++) {
6048:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6049:       }
6050:     } else { /* if near null space is not provided BDDC uses constants by default */
6051:       nnsp_size = 0;
6052:       nnsp_has_cnst = PETSC_TRUE;
6053:     }
6054:     /* get max number of constraints on a single cc */
6055:     max_constraints = nnsp_size;
6056:     if (nnsp_has_cnst) max_constraints++;

6058:     /*
6059:          Evaluate maximum storage size needed by the procedure
6060:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6061:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6062:          There can be multiple constraints per connected component
6063:                                                                                                                                                            */
6064:     n_vertices = 0;
6065:     if (ISForVertices) {
6066:       ISGetSize(ISForVertices,&n_vertices);
6067:     }
6068:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6069:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

6071:     total_counts = n_ISForFaces+n_ISForEdges;
6072:     total_counts *= max_constraints;
6073:     total_counts += n_vertices;
6074:     PetscBTCreate(total_counts,&change_basis);

6076:     total_counts = 0;
6077:     max_size_of_constraint = 0;
6078:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6079:       IS used_is;
6080:       if (i<n_ISForEdges) {
6081:         used_is = ISForEdges[i];
6082:       } else {
6083:         used_is = ISForFaces[i-n_ISForEdges];
6084:       }
6085:       ISGetSize(used_is,&j);
6086:       total_counts += j;
6087:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6088:     }
6089:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

6091:     /* get local part of global near null space vectors */
6092:     PetscMalloc1(nnsp_size,&localnearnullsp);
6093:     for (k=0;k<nnsp_size;k++) {
6094:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6095:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6096:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6097:     }

6099:     /* whether or not to skip lapack calls */
6100:     skip_lapack = PETSC_TRUE;
6101:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

6103:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6104:     if (!skip_lapack) {
6105:       PetscScalar temp_work;

6107: #if defined(PETSC_MISSING_LAPACK_GESVD)
6108:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6109:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6110:       PetscMalloc1(max_constraints,&singular_vals);
6111:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6112: #if defined(PETSC_USE_COMPLEX)
6113:       PetscMalloc1(3*max_constraints,&rwork);
6114: #endif
6115:       /* now we evaluate the optimal workspace using query with lwork=-1 */
6116:       PetscBLASIntCast(max_constraints,&Blas_N);
6117:       PetscBLASIntCast(max_constraints,&Blas_LDA);
6118:       lwork = -1;
6119:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6120: #if !defined(PETSC_USE_COMPLEX)
6121:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6122: #else
6123:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6124: #endif
6125:       PetscFPTrapPop();
6126:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6127: #else /* on missing GESVD */
6128:       /* SVD */
6129:       PetscInt max_n,min_n;
6130:       max_n = max_size_of_constraint;
6131:       min_n = max_constraints;
6132:       if (max_size_of_constraint < max_constraints) {
6133:         min_n = max_size_of_constraint;
6134:         max_n = max_constraints;
6135:       }
6136:       PetscMalloc1(min_n,&singular_vals);
6137: #if defined(PETSC_USE_COMPLEX)
6138:       PetscMalloc1(5*min_n,&rwork);
6139: #endif
6140:       /* now we evaluate the optimal workspace using query with lwork=-1 */
6141:       lwork = -1;
6142:       PetscBLASIntCast(max_n,&Blas_M);
6143:       PetscBLASIntCast(min_n,&Blas_N);
6144:       PetscBLASIntCast(max_n,&Blas_LDA);
6145:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6146: #if !defined(PETSC_USE_COMPLEX)
6147:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6148: #else
6149:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6150: #endif
6151:       PetscFPTrapPop();
6152:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6153: #endif /* on missing GESVD */
6154:       /* Allocate optimal workspace */
6155:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6156:       PetscMalloc1(lwork,&work);
6157:     }
6158:     /* Now we can loop on constraining sets */
6159:     total_counts = 0;
6160:     constraints_idxs_ptr[0] = 0;
6161:     constraints_data_ptr[0] = 0;
6162:     /* vertices */
6163:     if (n_vertices) {
6164:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6165:       PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
6166:       for (i=0;i<n_vertices;i++) {
6167:         constraints_n[total_counts] = 1;
6168:         constraints_data[total_counts] = 1.0;
6169:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6170:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6171:         total_counts++;
6172:       }
6173:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6174:       n_vertices = total_counts;
6175:     }

6177:     /* edges and faces */
6178:     total_counts_cc = total_counts;
6179:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6180:       IS        used_is;
6181:       PetscBool idxs_copied = PETSC_FALSE;

6183:       if (ncc<n_ISForEdges) {
6184:         used_is = ISForEdges[ncc];
6185:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6186:       } else {
6187:         used_is = ISForFaces[ncc-n_ISForEdges];
6188:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6189:       }
6190:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

6192:       ISGetSize(used_is,&size_of_constraint);
6193:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
6194:       /* change of basis should not be performed on local periodic nodes */
6195:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6196:       if (nnsp_has_cnst) {
6197:         PetscScalar quad_value;

6199:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6200:         idxs_copied = PETSC_TRUE;

6202:         if (!pcbddc->use_nnsp_true) {
6203:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6204:         } else {
6205:           quad_value = 1.0;
6206:         }
6207:         for (j=0;j<size_of_constraint;j++) {
6208:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6209:         }
6210:         temp_constraints++;
6211:         total_counts++;
6212:       }
6213:       for (k=0;k<nnsp_size;k++) {
6214:         PetscReal real_value;
6215:         PetscScalar *ptr_to_data;

6217:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6218:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6219:         for (j=0;j<size_of_constraint;j++) {
6220:           ptr_to_data[j] = array[is_indices[j]];
6221:         }
6222:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6223:         /* check if array is null on the connected component */
6224:         PetscBLASIntCast(size_of_constraint,&Blas_N);
6225:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6226:         if (real_value > tol*size_of_constraint) { /* keep indices and values */
6227:           temp_constraints++;
6228:           total_counts++;
6229:           if (!idxs_copied) {
6230:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6231:             idxs_copied = PETSC_TRUE;
6232:           }
6233:         }
6234:       }
6235:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6236:       valid_constraints = temp_constraints;
6237:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6238:         if (temp_constraints == 1) { /* just normalize the constraint */
6239:           PetscScalar norm,*ptr_to_data;

6241:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6242:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6243:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6244:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6245:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6246:         } else { /* perform SVD */
6247:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6249: #if defined(PETSC_MISSING_LAPACK_GESVD)
6250:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6251:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6252:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6253:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
6254:                 from that computed using LAPACKgesvd
6255:              -> This is due to a different computation of eigenvectors in LAPACKheev
6256:              -> The quality of the POD-computed basis will be the same */
6257:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
6258:           /* Store upper triangular part of correlation matrix */
6259:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6260:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6261:           for (j=0;j<temp_constraints;j++) {
6262:             for (k=0;k<j+1;k++) {
6263:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6264:             }
6265:           }
6266:           /* compute eigenvalues and eigenvectors of correlation matrix */
6267:           PetscBLASIntCast(temp_constraints,&Blas_N);
6268:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
6269: #if !defined(PETSC_USE_COMPLEX)
6270:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6271: #else
6272:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6273: #endif
6274:           PetscFPTrapPop();
6275:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6276:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6277:           j = 0;
6278:           while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6279:           total_counts = total_counts-j;
6280:           valid_constraints = temp_constraints-j;
6281:           /* scale and copy POD basis into used quadrature memory */
6282:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6283:           PetscBLASIntCast(temp_constraints,&Blas_N);
6284:           PetscBLASIntCast(temp_constraints,&Blas_K);
6285:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6286:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
6287:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6288:           if (j<temp_constraints) {
6289:             PetscInt ii;
6290:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6291:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6292:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6293:             PetscFPTrapPop();
6294:             for (k=0;k<temp_constraints-j;k++) {
6295:               for (ii=0;ii<size_of_constraint;ii++) {
6296:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6297:               }
6298:             }
6299:           }
6300: #else  /* on missing GESVD */
6301:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6302:           PetscBLASIntCast(temp_constraints,&Blas_N);
6303:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6304:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6305: #if !defined(PETSC_USE_COMPLEX)
6306:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6307: #else
6308:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6309: #endif
6310:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6311:           PetscFPTrapPop();
6312:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6313:           k = temp_constraints;
6314:           if (k > size_of_constraint) k = size_of_constraint;
6315:           j = 0;
6316:           while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6317:           valid_constraints = k-j;
6318:           total_counts = total_counts-temp_constraints+valid_constraints;
6319: #endif /* on missing GESVD */
6320:         }
6321:       }
6322:       /* update pointers information */
6323:       if (valid_constraints) {
6324:         constraints_n[total_counts_cc] = valid_constraints;
6325:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6326:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6327:         /* set change_of_basis flag */
6328:         if (boolforchange) {
6329:           PetscBTSet(change_basis,total_counts_cc);
6330:         }
6331:         total_counts_cc++;
6332:       }
6333:     }
6334:     /* free workspace */
6335:     if (!skip_lapack) {
6336:       PetscFree(work);
6337: #if defined(PETSC_USE_COMPLEX)
6338:       PetscFree(rwork);
6339: #endif
6340:       PetscFree(singular_vals);
6341: #if defined(PETSC_MISSING_LAPACK_GESVD)
6342:       PetscFree(correlation_mat);
6343:       PetscFree(temp_basis);
6344: #endif
6345:     }
6346:     for (k=0;k<nnsp_size;k++) {
6347:       VecDestroy(&localnearnullsp[k]);
6348:     }
6349:     PetscFree(localnearnullsp);
6350:     /* free index sets of faces, edges and vertices */
6351:     for (i=0;i<n_ISForFaces;i++) {
6352:       ISDestroy(&ISForFaces[i]);
6353:     }
6354:     if (n_ISForFaces) {
6355:       PetscFree(ISForFaces);
6356:     }
6357:     for (i=0;i<n_ISForEdges;i++) {
6358:       ISDestroy(&ISForEdges[i]);
6359:     }
6360:     if (n_ISForEdges) {
6361:       PetscFree(ISForEdges);
6362:     }
6363:     ISDestroy(&ISForVertices);
6364:   } else {
6365:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6367:     total_counts = 0;
6368:     n_vertices = 0;
6369:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6370:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6371:     }
6372:     max_constraints = 0;
6373:     total_counts_cc = 0;
6374:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6375:       total_counts += pcbddc->adaptive_constraints_n[i];
6376:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6377:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6378:     }
6379:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6380:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6381:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
6382:     constraints_data = pcbddc->adaptive_constraints_data;
6383:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6384:     PetscMalloc1(total_counts_cc,&constraints_n);
6385:     total_counts_cc = 0;
6386:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6387:       if (pcbddc->adaptive_constraints_n[i]) {
6388:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6389:       }
6390:     }

6392:     max_size_of_constraint = 0;
6393:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6394:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6395:     /* Change of basis */
6396:     PetscBTCreate(total_counts_cc,&change_basis);
6397:     if (pcbddc->use_change_of_basis) {
6398:       for (i=0;i<sub_schurs->n_subs;i++) {
6399:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6400:           PetscBTSet(change_basis,i+n_vertices);
6401:         }
6402:       }
6403:     }
6404:   }
6405:   pcbddc->local_primal_size = total_counts;
6406:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

6408:   /* map constraints_idxs in boundary numbering */
6409:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6410:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);

6412:   /* Create constraint matrix */
6413:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6414:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6415:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

6417:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6418:   /* determine if a QR strategy is needed for change of basis */
6419:   qr_needed = pcbddc->use_qr_single;
6420:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
6421:   total_primal_vertices=0;
6422:   pcbddc->local_primal_size_cc = 0;
6423:   for (i=0;i<total_counts_cc;i++) {
6424:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6425:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6426:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6427:       pcbddc->local_primal_size_cc += 1;
6428:     } else if (PetscBTLookup(change_basis,i)) {
6429:       for (k=0;k<constraints_n[i];k++) {
6430:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6431:       }
6432:       pcbddc->local_primal_size_cc += constraints_n[i];
6433:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6434:         PetscBTSet(qr_needed_idx,i);
6435:         qr_needed = PETSC_TRUE;
6436:       }
6437:     } else {
6438:       pcbddc->local_primal_size_cc += 1;
6439:     }
6440:   }
6441:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6442:   pcbddc->n_vertices = total_primal_vertices;
6443:   /* permute indices in order to have a sorted set of vertices */
6444:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6445:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6446:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
6447:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

6449:   /* nonzero structure of constraint matrix */
6450:   /* and get reference dof for local constraints */
6451:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
6452:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

6454:   j = total_primal_vertices;
6455:   total_counts = total_primal_vertices;
6456:   cum = total_primal_vertices;
6457:   for (i=n_vertices;i<total_counts_cc;i++) {
6458:     if (!PetscBTLookup(change_basis,i)) {
6459:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6460:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6461:       cum++;
6462:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6463:       for (k=0;k<constraints_n[i];k++) {
6464:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6465:         nnz[j+k] = size_of_constraint;
6466:       }
6467:       j += constraints_n[i];
6468:     }
6469:   }
6470:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6471:   MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6472:   PetscFree(nnz);

6474:   /* set values in constraint matrix */
6475:   for (i=0;i<total_primal_vertices;i++) {
6476:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6477:   }
6478:   total_counts = total_primal_vertices;
6479:   for (i=n_vertices;i<total_counts_cc;i++) {
6480:     if (!PetscBTLookup(change_basis,i)) {
6481:       PetscInt *cols;

6483:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6484:       cols = constraints_idxs+constraints_idxs_ptr[i];
6485:       for (k=0;k<constraints_n[i];k++) {
6486:         PetscInt    row = total_counts+k;
6487:         PetscScalar *vals;

6489:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6490:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6491:       }
6492:       total_counts += constraints_n[i];
6493:     }
6494:   }
6495:   /* assembling */
6496:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6497:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6498:   MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");

6500:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6501:   if (pcbddc->use_change_of_basis) {
6502:     /* dual and primal dofs on a single cc */
6503:     PetscInt     dual_dofs,primal_dofs;
6504:     /* working stuff for GEQRF */
6505:     PetscScalar  *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6506:     PetscBLASInt lqr_work;
6507:     /* working stuff for UNGQR */
6508:     PetscScalar  *gqr_work = NULL,lgqr_work_t;
6509:     PetscBLASInt lgqr_work;
6510:     /* working stuff for TRTRS */
6511:     PetscScalar  *trs_rhs = NULL;
6512:     PetscBLASInt Blas_NRHS;
6513:     /* pointers for values insertion into change of basis matrix */
6514:     PetscInt     *start_rows,*start_cols;
6515:     PetscScalar  *start_vals;
6516:     /* working stuff for values insertion */
6517:     PetscBT      is_primal;
6518:     PetscInt     *aux_primal_numbering_B;
6519:     /* matrix sizes */
6520:     PetscInt     global_size,local_size;
6521:     /* temporary change of basis */
6522:     Mat          localChangeOfBasisMatrix;
6523:     /* extra space for debugging */
6524:     PetscScalar  *dbg_work = NULL;

6526:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6527:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6528:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6529:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6530:     /* nonzeros for local mat */
6531:     PetscMalloc1(pcis->n,&nnz);
6532:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6533:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6534:     } else {
6535:       const PetscInt *ii;
6536:       PetscInt       n;
6537:       PetscBool      flg_row;
6538:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6539:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6540:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6541:     }
6542:     for (i=n_vertices;i<total_counts_cc;i++) {
6543:       if (PetscBTLookup(change_basis,i)) {
6544:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6545:         if (PetscBTLookup(qr_needed_idx,i)) {
6546:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6547:         } else {
6548:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6549:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6550:         }
6551:       }
6552:     }
6553:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6554:     MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6555:     PetscFree(nnz);
6556:     /* Set interior change in the matrix */
6557:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6558:       for (i=0;i<pcis->n;i++) {
6559:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6560:       }
6561:     } else {
6562:       const PetscInt *ii,*jj;
6563:       PetscScalar    *aa;
6564:       PetscInt       n;
6565:       PetscBool      flg_row;
6566:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6567:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6568:       for (i=0;i<n;i++) {
6569:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6570:       }
6571:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6572:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6573:     }

6575:     if (pcbddc->dbg_flag) {
6576:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6577:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6578:     }


6581:     /* Now we loop on the constraints which need a change of basis */
6582:     /*
6583:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6584:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6586:        Basic blocks of change of basis matrix T computed by

6588:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6590:             | 1        0   ...        0         s_1/S |
6591:             | 0        1   ...        0         s_2/S |
6592:             |              ...                        |
6593:             | 0        ...            1     s_{n-1}/S |
6594:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6596:             with S = \sum_{i=1}^n s_i^2
6597:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6598:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6600:           - QR decomposition of constraints otherwise
6601:     */
6602:     if (qr_needed && max_size_of_constraint) {
6603:       /* space to store Q */
6604:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6605:       /* array to store scaling factors for reflectors */
6606:       PetscMalloc1(max_constraints,&qr_tau);
6607:       /* first we issue queries for optimal work */
6608:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6609:       PetscBLASIntCast(max_constraints,&Blas_N);
6610:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6611:       lqr_work = -1;
6612:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6613:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6614:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6615:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6616:       lgqr_work = -1;
6617:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6618:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6619:       PetscBLASIntCast(max_constraints,&Blas_K);
6620:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6621:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6622:       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6623:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6624:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6625:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6626:       /* array to store rhs and solution of triangular solver */
6627:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6628:       /* allocating workspace for check */
6629:       if (pcbddc->dbg_flag) {
6630:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6631:       }
6632:     }
6633:     /* array to store whether a node is primal or not */
6634:     PetscBTCreate(pcis->n_B,&is_primal);
6635:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6636:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6637:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6638:     for (i=0;i<total_primal_vertices;i++) {
6639:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6640:     }
6641:     PetscFree(aux_primal_numbering_B);

6643:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6644:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6645:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6646:       if (PetscBTLookup(change_basis,total_counts)) {
6647:         /* get constraint info */
6648:         primal_dofs = constraints_n[total_counts];
6649:         dual_dofs = size_of_constraint-primal_dofs;

6651:         if (pcbddc->dbg_flag) {
6652:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6653:         }

6655:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6657:           /* copy quadrature constraints for change of basis check */
6658:           if (pcbddc->dbg_flag) {
6659:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6660:           }
6661:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6662:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6664:           /* compute QR decomposition of constraints */
6665:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6666:           PetscBLASIntCast(primal_dofs,&Blas_N);
6667:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6668:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6669:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6670:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6671:           PetscFPTrapPop();

6673:           /* explictly compute R^-T */
6674:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6675:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6676:           PetscBLASIntCast(primal_dofs,&Blas_N);
6677:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6678:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6679:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6680:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6681:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6682:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6683:           PetscFPTrapPop();

6685:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6686:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6687:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6688:           PetscBLASIntCast(primal_dofs,&Blas_K);
6689:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6690:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6691:           PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6692:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6693:           PetscFPTrapPop();

6695:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6696:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6697:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6698:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6699:           PetscBLASIntCast(primal_dofs,&Blas_N);
6700:           PetscBLASIntCast(primal_dofs,&Blas_K);
6701:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6702:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6703:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6704:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6705:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6706:           PetscFPTrapPop();
6707:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6709:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6710:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6711:           /* insert cols for primal dofs */
6712:           for (j=0;j<primal_dofs;j++) {
6713:             start_vals = &qr_basis[j*size_of_constraint];
6714:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6715:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6716:           }
6717:           /* insert cols for dual dofs */
6718:           for (j=0,k=0;j<dual_dofs;k++) {
6719:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6720:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6721:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6722:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6723:               j++;
6724:             }
6725:           }

6727:           /* check change of basis */
6728:           if (pcbddc->dbg_flag) {
6729:             PetscInt   ii,jj;
6730:             PetscBool valid_qr=PETSC_TRUE;
6731:             PetscBLASIntCast(primal_dofs,&Blas_M);
6732:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6733:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6734:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6735:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6736:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6737:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6738:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6739:             PetscFPTrapPop();
6740:             for (jj=0;jj<size_of_constraint;jj++) {
6741:               for (ii=0;ii<primal_dofs;ii++) {
6742:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6743:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6744:               }
6745:             }
6746:             if (!valid_qr) {
6747:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6748:               for (jj=0;jj<size_of_constraint;jj++) {
6749:                 for (ii=0;ii<primal_dofs;ii++) {
6750:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6751:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6752:                   }
6753:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6754:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6755:                   }
6756:                 }
6757:               }
6758:             } else {
6759:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6760:             }
6761:           }
6762:         } else { /* simple transformation block */
6763:           PetscInt    row,col;
6764:           PetscScalar val,norm;

6766:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6767:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6768:           for (j=0;j<size_of_constraint;j++) {
6769:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6770:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6771:             if (!PetscBTLookup(is_primal,row_B)) {
6772:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6773:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6774:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6775:             } else {
6776:               for (k=0;k<size_of_constraint;k++) {
6777:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6778:                 if (row != col) {
6779:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6780:                 } else {
6781:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6782:                 }
6783:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6784:               }
6785:             }
6786:           }
6787:           if (pcbddc->dbg_flag) {
6788:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6789:           }
6790:         }
6791:       } else {
6792:         if (pcbddc->dbg_flag) {
6793:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6794:         }
6795:       }
6796:     }

6798:     /* free workspace */
6799:     if (qr_needed) {
6800:       if (pcbddc->dbg_flag) {
6801:         PetscFree(dbg_work);
6802:       }
6803:       PetscFree(trs_rhs);
6804:       PetscFree(qr_tau);
6805:       PetscFree(qr_work);
6806:       PetscFree(gqr_work);
6807:       PetscFree(qr_basis);
6808:     }
6809:     PetscBTDestroy(&is_primal);
6810:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6811:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6813:     /* assembling of global change of variable */
6814:     if (!pcbddc->fake_change) {
6815:       Mat      tmat;
6816:       PetscInt bs;

6818:       VecGetSize(pcis->vec1_global,&global_size);
6819:       VecGetLocalSize(pcis->vec1_global,&local_size);
6820:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6821:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6822:       MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6823:       MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6824:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6825:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6826:       MatGetBlockSize(pc->pmat,&bs);
6827:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6828:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6829:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6830:       MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6831:       MatDestroy(&tmat);
6832:       VecSet(pcis->vec1_global,0.0);
6833:       VecSet(pcis->vec1_N,1.0);
6834:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6835:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6836:       VecReciprocal(pcis->vec1_global);
6837:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6839:       /* check */
6840:       if (pcbddc->dbg_flag) {
6841:         PetscReal error;
6842:         Vec       x,x_change;

6844:         VecDuplicate(pcis->vec1_global,&x);
6845:         VecDuplicate(pcis->vec1_global,&x_change);
6846:         VecSetRandom(x,NULL);
6847:         VecCopy(x,pcis->vec1_global);
6848:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6849:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6850:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6851:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6852:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6853:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6854:         VecAXPY(x,-1.0,x_change);
6855:         VecNorm(x,NORM_INFINITY,&error);
6856:         if (error > PETSC_SMALL) {
6857:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6858:         }
6859:         VecDestroy(&x);
6860:         VecDestroy(&x_change);
6861:       }
6862:       /* adapt sub_schurs computed (if any) */
6863:       if (pcbddc->use_deluxe_scaling) {
6864:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6866:         if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6867:         if (sub_schurs && sub_schurs->S_Ej_all) {
6868:           Mat                    S_new,tmat;
6869:           IS                     is_all_N,is_V_Sall = NULL;

6871:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6872:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6873:           if (pcbddc->deluxe_zerorows) {
6874:             ISLocalToGlobalMapping NtoSall;
6875:             IS                     is_V;
6876:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6877:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6878:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6879:             ISLocalToGlobalMappingDestroy(&NtoSall);
6880:             ISDestroy(&is_V);
6881:           }
6882:           ISDestroy(&is_all_N);
6883:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6884:           MatDestroy(&sub_schurs->S_Ej_all);
6885:           PetscObjectReference((PetscObject)S_new);
6886:           if (pcbddc->deluxe_zerorows) {
6887:             const PetscScalar *array;
6888:             const PetscInt    *idxs_V,*idxs_all;
6889:             PetscInt          i,n_V;

6891:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6892:             ISGetLocalSize(is_V_Sall,&n_V);
6893:             ISGetIndices(is_V_Sall,&idxs_V);
6894:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6895:             VecGetArrayRead(pcis->D,&array);
6896:             for (i=0;i<n_V;i++) {
6897:               PetscScalar val;
6898:               PetscInt    idx;

6900:               idx = idxs_V[i];
6901:               val = array[idxs_all[idxs_V[i]]];
6902:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6903:             }
6904:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6905:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6906:             VecRestoreArrayRead(pcis->D,&array);
6907:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6908:             ISRestoreIndices(is_V_Sall,&idxs_V);
6909:           }
6910:           sub_schurs->S_Ej_all = S_new;
6911:           MatDestroy(&S_new);
6912:           if (sub_schurs->sum_S_Ej_all) {
6913:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6914:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6915:             PetscObjectReference((PetscObject)S_new);
6916:             if (pcbddc->deluxe_zerorows) {
6917:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6918:             }
6919:             sub_schurs->sum_S_Ej_all = S_new;
6920:             MatDestroy(&S_new);
6921:           }
6922:           ISDestroy(&is_V_Sall);
6923:           MatDestroy(&tmat);
6924:         }
6925:         /* destroy any change of basis context in sub_schurs */
6926:         if (sub_schurs && sub_schurs->change) {
6927:           PetscInt i;

6929:           for (i=0;i<sub_schurs->n_subs;i++) {
6930:             KSPDestroy(&sub_schurs->change[i]);
6931:           }
6932:           PetscFree(sub_schurs->change);
6933:         }
6934:       }
6935:       if (pcbddc->switch_static) { /* need to save the local change */
6936:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6937:       } else {
6938:         MatDestroy(&localChangeOfBasisMatrix);
6939:       }
6940:       /* determine if any process has changed the pressures locally */
6941:       pcbddc->change_interior = pcbddc->benign_have_null;
6942:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6943:       MatDestroy(&pcbddc->ConstraintMatrix);
6944:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6945:       pcbddc->use_qr_single = qr_needed;
6946:     }
6947:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6948:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6949:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6950:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6951:     } else {
6952:       Mat benign_global = NULL;
6953:       if (pcbddc->benign_have_null) {
6954:         Mat M;

6956:         pcbddc->change_interior = PETSC_TRUE;
6957:         VecCopy(matis->counter,pcis->vec1_N);
6958:         VecReciprocal(pcis->vec1_N);
6959:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
6960:         if (pcbddc->benign_change) {
6961:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6962:           MatDiagonalScale(M,pcis->vec1_N,NULL);
6963:         } else {
6964:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
6965:           MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
6966:         }
6967:         MatISSetLocalMat(benign_global,M);
6968:         MatDestroy(&M);
6969:         MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
6970:         MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
6971:       }
6972:       if (pcbddc->user_ChangeOfBasisMatrix) {
6973:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6974:         MatDestroy(&benign_global);
6975:       } else if (pcbddc->benign_have_null) {
6976:         pcbddc->ChangeOfBasisMatrix = benign_global;
6977:       }
6978:     }
6979:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6980:       IS             is_global;
6981:       const PetscInt *gidxs;

6983:       ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6984:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6985:       ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6986:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6987:       ISDestroy(&is_global);
6988:     }
6989:   }
6990:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6991:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6992:   }

6994:   if (!pcbddc->fake_change) {
6995:     /* add pressure dofs to set of primal nodes for numbering purposes */
6996:     for (i=0;i<pcbddc->benign_n;i++) {
6997:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6998:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6999:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7000:       pcbddc->local_primal_size_cc++;
7001:       pcbddc->local_primal_size++;
7002:     }

7004:     /* check if a new primal space has been introduced (also take into account benign trick) */
7005:     pcbddc->new_primal_space_local = PETSC_TRUE;
7006:     if (olocal_primal_size == pcbddc->local_primal_size) {
7007:       PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
7008:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7009:       if (!pcbddc->new_primal_space_local) {
7010:         PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
7011:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7012:       }
7013:     }
7014:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7015:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7016:   }
7017:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

7019:   /* flush dbg viewer */
7020:   if (pcbddc->dbg_flag) {
7021:     PetscViewerFlush(pcbddc->dbg_viewer);
7022:   }

7024:   /* free workspace */
7025:   PetscBTDestroy(&qr_needed_idx);
7026:   PetscBTDestroy(&change_basis);
7027:   if (!pcbddc->adaptive_selection) {
7028:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7029:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7030:   } else {
7031:     PetscFree5(pcbddc->adaptive_constraints_n,
7032:                       pcbddc->adaptive_constraints_idxs_ptr,
7033:                       pcbddc->adaptive_constraints_data_ptr,
7034:                       pcbddc->adaptive_constraints_idxs,
7035:                       pcbddc->adaptive_constraints_data);
7036:     PetscFree(constraints_n);
7037:     PetscFree(constraints_idxs_B);
7038:   }
7039:   return(0);
7040: }
7041: /* #undef PETSC_MISSING_LAPACK_GESVD */

7043: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7044: {
7045:   ISLocalToGlobalMapping map;
7046:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7047:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
7048:   PetscInt               i,N;
7049:   PetscBool              rcsr = PETSC_FALSE;
7050:   PetscErrorCode         ierr;

7053:   if (pcbddc->recompute_topography) {
7054:     pcbddc->graphanalyzed = PETSC_FALSE;
7055:     /* Reset previously computed graph */
7056:     PCBDDCGraphReset(pcbddc->mat_graph);
7057:     /* Init local Graph struct */
7058:     MatGetSize(pc->pmat,&N,NULL);
7059:     MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7060:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

7062:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7063:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7064:     }
7065:     /* Check validity of the csr graph passed in by the user */
7066:     if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);

7068:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7069:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7070:       PetscInt  *xadj,*adjncy;
7071:       PetscInt  nvtxs;
7072:       PetscBool flg_row=PETSC_FALSE;

7074:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7075:       if (flg_row) {
7076:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7077:         pcbddc->computed_rowadj = PETSC_TRUE;
7078:       }
7079:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7080:       rcsr = PETSC_TRUE;
7081:     }
7082:     if (pcbddc->dbg_flag) {
7083:       PetscViewerFlush(pcbddc->dbg_viewer);
7084:     }

7086:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7087:       PetscReal    *lcoords;
7088:       PetscInt     n;
7089:       MPI_Datatype dimrealtype;

7091:       /* TODO: support for blocked */
7092:       if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7093:       MatGetLocalSize(matis->A,&n,NULL);
7094:       PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7095:       MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7096:       MPI_Type_commit(&dimrealtype);
7097:       PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7098:       PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7099:       MPI_Type_free(&dimrealtype);
7100:       PetscFree(pcbddc->mat_graph->coords);

7102:       pcbddc->mat_graph->coords = lcoords;
7103:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7104:       pcbddc->mat_graph->cnloc  = n;
7105:     }
7106:     if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7107:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);

7109:     /* Setup of Graph */
7110:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7111:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

7113:     /* attach info on disconnected subdomains if present */
7114:     if (pcbddc->n_local_subs) {
7115:       PetscInt *local_subs;

7117:       PetscMalloc1(N,&local_subs);
7118:       for (i=0;i<pcbddc->n_local_subs;i++) {
7119:         const PetscInt *idxs;
7120:         PetscInt       nl,j;

7122:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
7123:         ISGetIndices(pcbddc->local_subs[i],&idxs);
7124:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7125:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7126:       }
7127:       pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
7128:       pcbddc->mat_graph->local_subs = local_subs;
7129:     }
7130:   }

7132:   if (!pcbddc->graphanalyzed) {
7133:     /* Graph's connected components analysis */
7134:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7135:     pcbddc->graphanalyzed = PETSC_TRUE;
7136:     pcbddc->corner_selected = pcbddc->corner_selection;
7137:   }
7138:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7139:   return(0);
7140: }

7142: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
7143: {
7144:   PetscInt       i,j;
7145:   PetscScalar    *alphas;
7146:   PetscReal      norm;

7150:   if (!n) return(0);
7151:   PetscMalloc1(n,&alphas);
7152:   VecNormalize(vecs[0],&norm);
7153:   if (norm < PETSC_SMALL) {
7154:     VecSet(vecs[0],0.0);
7155:   }
7156:   for (i=1;i<n;i++) {
7157:     VecMDot(vecs[i],i,vecs,alphas);
7158:     for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7159:     VecMAXPY(vecs[i],i,alphas,vecs);
7160:     VecNormalize(vecs[i],&norm);
7161:     if (norm < PETSC_SMALL) {
7162:       VecSet(vecs[i],0.0);
7163:     }
7164:   }
7165:   PetscFree(alphas);
7166:   return(0);
7167: }

7169: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7170: {
7171:   Mat            A;
7172:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
7173:   PetscMPIInt    size,rank,color;
7174:   PetscInt       *xadj,*adjncy;
7175:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7176:   PetscInt       im_active,active_procs,N,n,i,j,threshold = 2;
7177:   PetscInt       void_procs,*procs_candidates = NULL;
7178:   PetscInt       xadj_count,*count;
7179:   PetscBool      ismatis,use_vwgt=PETSC_FALSE;
7180:   PetscSubcomm   psubcomm;
7181:   MPI_Comm       subcomm;

7186:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7187:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7190:   if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);

7192:   if (have_void) *have_void = PETSC_FALSE;
7193:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7194:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7195:   MatISGetLocalMat(mat,&A);
7196:   MatGetLocalSize(A,&n,NULL);
7197:   im_active = !!n;
7198:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7199:   void_procs = size - active_procs;
7200:   /* get ranks of of non-active processes in mat communicator */
7201:   if (void_procs) {
7202:     PetscInt ncand;

7204:     if (have_void) *have_void = PETSC_TRUE;
7205:     PetscMalloc1(size,&procs_candidates);
7206:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7207:     for (i=0,ncand=0;i<size;i++) {
7208:       if (!procs_candidates[i]) {
7209:         procs_candidates[ncand++] = i;
7210:       }
7211:     }
7212:     /* force n_subdomains to be not greater that the number of non-active processes */
7213:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
7214:   }

7216:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7217:      number of subdomains requested 1 -> send to master or first candidate in voids  */
7218:   MatGetSize(mat,&N,NULL);
7219:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7220:     PetscInt issize,isidx,dest;
7221:     if (*n_subdomains == 1) dest = 0;
7222:     else dest = rank;
7223:     if (im_active) {
7224:       issize = 1;
7225:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7226:         isidx = procs_candidates[dest];
7227:       } else {
7228:         isidx = dest;
7229:       }
7230:     } else {
7231:       issize = 0;
7232:       isidx = -1;
7233:     }
7234:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7235:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7236:     PetscFree(procs_candidates);
7237:     return(0);
7238:   }
7239:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7240:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7241:   threshold = PetscMax(threshold,2);

7243:   /* Get info on mapping */
7244:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

7246:   /* build local CSR graph of subdomains' connectivity */
7247:   PetscMalloc1(2,&xadj);
7248:   xadj[0] = 0;
7249:   xadj[1] = PetscMax(n_neighs-1,0);
7250:   PetscMalloc1(xadj[1],&adjncy);
7251:   PetscMalloc1(xadj[1],&adjncy_wgt);
7252:   PetscCalloc1(n,&count);
7253:   for (i=1;i<n_neighs;i++)
7254:     for (j=0;j<n_shared[i];j++)
7255:       count[shared[i][j]] += 1;

7257:   xadj_count = 0;
7258:   for (i=1;i<n_neighs;i++) {
7259:     for (j=0;j<n_shared[i];j++) {
7260:       if (count[shared[i][j]] < threshold) {
7261:         adjncy[xadj_count] = neighs[i];
7262:         adjncy_wgt[xadj_count] = n_shared[i];
7263:         xadj_count++;
7264:         break;
7265:       }
7266:     }
7267:   }
7268:   xadj[1] = xadj_count;
7269:   PetscFree(count);
7270:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7271:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

7273:   PetscMalloc1(1,&ranks_send_to_idx);

7275:   /* Restrict work on active processes only */
7276:   PetscMPIIntCast(im_active,&color);
7277:   if (void_procs) {
7278:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7279:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7280:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7281:     subcomm = PetscSubcommChild(psubcomm);
7282:   } else {
7283:     psubcomm = NULL;
7284:     subcomm = PetscObjectComm((PetscObject)mat);
7285:   }

7287:   v_wgt = NULL;
7288:   if (!color) {
7289:     PetscFree(xadj);
7290:     PetscFree(adjncy);
7291:     PetscFree(adjncy_wgt);
7292:   } else {
7293:     Mat             subdomain_adj;
7294:     IS              new_ranks,new_ranks_contig;
7295:     MatPartitioning partitioner;
7296:     PetscInt        rstart=0,rend=0;
7297:     PetscInt        *is_indices,*oldranks;
7298:     PetscMPIInt     size;
7299:     PetscBool       aggregate;

7301:     MPI_Comm_size(subcomm,&size);
7302:     if (void_procs) {
7303:       PetscInt prank = rank;
7304:       PetscMalloc1(size,&oldranks);
7305:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7306:       for (i=0;i<xadj[1];i++) {
7307:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7308:       }
7309:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7310:     } else {
7311:       oldranks = NULL;
7312:     }
7313:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7314:     if (aggregate) { /* TODO: all this part could be made more efficient */
7315:       PetscInt    lrows,row,ncols,*cols;
7316:       PetscMPIInt nrank;
7317:       PetscScalar *vals;

7319:       MPI_Comm_rank(subcomm,&nrank);
7320:       lrows = 0;
7321:       if (nrank<redprocs) {
7322:         lrows = size/redprocs;
7323:         if (nrank<size%redprocs) lrows++;
7324:       }
7325:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7326:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7327:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7328:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7329:       row = nrank;
7330:       ncols = xadj[1]-xadj[0];
7331:       cols = adjncy;
7332:       PetscMalloc1(ncols,&vals);
7333:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7334:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7335:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7336:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7337:       PetscFree(xadj);
7338:       PetscFree(adjncy);
7339:       PetscFree(adjncy_wgt);
7340:       PetscFree(vals);
7341:       if (use_vwgt) {
7342:         Vec               v;
7343:         const PetscScalar *array;
7344:         PetscInt          nl;

7346:         MatCreateVecs(subdomain_adj,&v,NULL);
7347:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7348:         VecAssemblyBegin(v);
7349:         VecAssemblyEnd(v);
7350:         VecGetLocalSize(v,&nl);
7351:         VecGetArrayRead(v,&array);
7352:         PetscMalloc1(nl,&v_wgt);
7353:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7354:         VecRestoreArrayRead(v,&array);
7355:         VecDestroy(&v);
7356:       }
7357:     } else {
7358:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7359:       if (use_vwgt) {
7360:         PetscMalloc1(1,&v_wgt);
7361:         v_wgt[0] = n;
7362:       }
7363:     }
7364:     /* MatView(subdomain_adj,0); */

7366:     /* Partition */
7367:     MatPartitioningCreate(subcomm,&partitioner);
7368: #if defined(PETSC_HAVE_PTSCOTCH)
7369:     MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7370: #elif defined(PETSC_HAVE_PARMETIS)
7371:     MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7372: #else
7373:     MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7374: #endif
7375:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7376:     if (v_wgt) {
7377:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
7378:     }
7379:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7380:     MatPartitioningSetNParts(partitioner,*n_subdomains);
7381:     MatPartitioningSetFromOptions(partitioner);
7382:     MatPartitioningApply(partitioner,&new_ranks);
7383:     /* MatPartitioningView(partitioner,0); */

7385:     /* renumber new_ranks to avoid "holes" in new set of processors */
7386:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7387:     ISDestroy(&new_ranks);
7388:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7389:     if (!aggregate) {
7390:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7391: #if defined(PETSC_USE_DEBUG)
7392:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7393: #endif
7394:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7395:       } else if (oldranks) {
7396:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7397:       } else {
7398:         ranks_send_to_idx[0] = is_indices[0];
7399:       }
7400:     } else {
7401:       PetscInt    idx = 0;
7402:       PetscMPIInt tag;
7403:       MPI_Request *reqs;

7405:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7406:       PetscMalloc1(rend-rstart,&reqs);
7407:       for (i=rstart;i<rend;i++) {
7408:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7409:       }
7410:       MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7411:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7412:       PetscFree(reqs);
7413:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7414: #if defined(PETSC_USE_DEBUG)
7415:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7416: #endif
7417:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7418:       } else if (oldranks) {
7419:         ranks_send_to_idx[0] = oldranks[idx];
7420:       } else {
7421:         ranks_send_to_idx[0] = idx;
7422:       }
7423:     }
7424:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7425:     /* clean up */
7426:     PetscFree(oldranks);
7427:     ISDestroy(&new_ranks_contig);
7428:     MatDestroy(&subdomain_adj);
7429:     MatPartitioningDestroy(&partitioner);
7430:   }
7431:   PetscSubcommDestroy(&psubcomm);
7432:   PetscFree(procs_candidates);

7434:   /* assemble parallel IS for sends */
7435:   i = 1;
7436:   if (!color) i=0;
7437:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7438:   return(0);
7439: }

7441: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

7443: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7444: {
7445:   Mat                    local_mat;
7446:   IS                     is_sends_internal;
7447:   PetscInt               rows,cols,new_local_rows;
7448:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7449:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
7450:   ISLocalToGlobalMapping l2gmap;
7451:   PetscInt*              l2gmap_indices;
7452:   const PetscInt*        is_indices;
7453:   MatType                new_local_type;
7454:   /* buffers */
7455:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7456:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7457:   PetscInt               *recv_buffer_idxs_local;
7458:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
7459:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7460:   /* MPI */
7461:   MPI_Comm               comm,comm_n;
7462:   PetscSubcomm           subcomm;
7463:   PetscMPIInt            n_sends,n_recvs,size;
7464:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7465:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7466:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7467:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7468:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7469:   PetscErrorCode         ierr;

7473:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7474:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7481:   if (nvecs) {
7482:     if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7484:   }
7485:   /* further checks */
7486:   MatISGetLocalMat(mat,&local_mat);
7487:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7488:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7489:   MatGetSize(local_mat,&rows,&cols);
7490:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7491:   if (reuse && *mat_n) {
7492:     PetscInt mrows,mcols,mnrows,mncols;
7494:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7495:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7496:     MatGetSize(mat,&mrows,&mcols);
7497:     MatGetSize(*mat_n,&mnrows,&mncols);
7498:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7499:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7500:   }
7501:   MatGetBlockSize(local_mat,&bs);

7504:   /* prepare IS for sending if not provided */
7505:   if (!is_sends) {
7506:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7507:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7508:   } else {
7509:     PetscObjectReference((PetscObject)is_sends);
7510:     is_sends_internal = is_sends;
7511:   }

7513:   /* get comm */
7514:   PetscObjectGetComm((PetscObject)mat,&comm);

7516:   /* compute number of sends */
7517:   ISGetLocalSize(is_sends_internal,&i);
7518:   PetscMPIIntCast(i,&n_sends);

7520:   /* compute number of receives */
7521:   MPI_Comm_size(comm,&size);
7522:   PetscMalloc1(size,&iflags);
7523:   PetscMemzero(iflags,size*sizeof(*iflags));
7524:   ISGetIndices(is_sends_internal,&is_indices);
7525:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7526:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7527:   PetscFree(iflags);

7529:   /* restrict comm if requested */
7530:   subcomm = 0;
7531:   destroy_mat = PETSC_FALSE;
7532:   if (restrict_comm) {
7533:     PetscMPIInt color,subcommsize;

7535:     color = 0;
7536:     if (restrict_full) {
7537:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7538:     } else {
7539:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7540:     }
7541:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7542:     subcommsize = size - subcommsize;
7543:     /* check if reuse has been requested */
7544:     if (reuse) {
7545:       if (*mat_n) {
7546:         PetscMPIInt subcommsize2;
7547:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7548:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7549:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7550:       } else {
7551:         comm_n = PETSC_COMM_SELF;
7552:       }
7553:     } else { /* MAT_INITIAL_MATRIX */
7554:       PetscMPIInt rank;

7556:       MPI_Comm_rank(comm,&rank);
7557:       PetscSubcommCreate(comm,&subcomm);
7558:       PetscSubcommSetNumber(subcomm,2);
7559:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7560:       comm_n = PetscSubcommChild(subcomm);
7561:     }
7562:     /* flag to destroy *mat_n if not significative */
7563:     if (color) destroy_mat = PETSC_TRUE;
7564:   } else {
7565:     comm_n = comm;
7566:   }

7568:   /* prepare send/receive buffers */
7569:   PetscMalloc1(size,&ilengths_idxs);
7570:   PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));
7571:   PetscMalloc1(size,&ilengths_vals);
7572:   PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));
7573:   if (nis) {
7574:     PetscCalloc1(size,&ilengths_idxs_is);
7575:   }

7577:   /* Get data from local matrices */
7578:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7579:     /* TODO: See below some guidelines on how to prepare the local buffers */
7580:     /*
7581:        send_buffer_vals should contain the raw values of the local matrix
7582:        send_buffer_idxs should contain:
7583:        - MatType_PRIVATE type
7584:        - PetscInt        size_of_l2gmap
7585:        - PetscInt        global_row_indices[size_of_l2gmap]
7586:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7587:     */
7588:   else {
7589:     MatDenseGetArray(local_mat,&send_buffer_vals);
7590:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7591:     PetscMalloc1(i+2,&send_buffer_idxs);
7592:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7593:     send_buffer_idxs[1] = i;
7594:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7595:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7596:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7597:     PetscMPIIntCast(i,&len);
7598:     for (i=0;i<n_sends;i++) {
7599:       ilengths_vals[is_indices[i]] = len*len;
7600:       ilengths_idxs[is_indices[i]] = len+2;
7601:     }
7602:   }
7603:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7604:   /* additional is (if any) */
7605:   if (nis) {
7606:     PetscMPIInt psum;
7607:     PetscInt j;
7608:     for (j=0,psum=0;j<nis;j++) {
7609:       PetscInt plen;
7610:       ISGetLocalSize(isarray[j],&plen);
7611:       PetscMPIIntCast(plen,&len);
7612:       psum += len+1; /* indices + lenght */
7613:     }
7614:     PetscMalloc1(psum,&send_buffer_idxs_is);
7615:     for (j=0,psum=0;j<nis;j++) {
7616:       PetscInt plen;
7617:       const PetscInt *is_array_idxs;
7618:       ISGetLocalSize(isarray[j],&plen);
7619:       send_buffer_idxs_is[psum] = plen;
7620:       ISGetIndices(isarray[j],&is_array_idxs);
7621:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7622:       ISRestoreIndices(isarray[j],&is_array_idxs);
7623:       psum += plen+1; /* indices + lenght */
7624:     }
7625:     for (i=0;i<n_sends;i++) {
7626:       ilengths_idxs_is[is_indices[i]] = psum;
7627:     }
7628:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7629:   }
7630:   MatISRestoreLocalMat(mat,&local_mat);

7632:   buf_size_idxs = 0;
7633:   buf_size_vals = 0;
7634:   buf_size_idxs_is = 0;
7635:   buf_size_vecs = 0;
7636:   for (i=0;i<n_recvs;i++) {
7637:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7638:     buf_size_vals += (PetscInt)olengths_vals[i];
7639:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7640:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7641:   }
7642:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7643:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7644:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7645:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7647:   /* get new tags for clean communications */
7648:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7649:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7650:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7651:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7653:   /* allocate for requests */
7654:   PetscMalloc1(n_sends,&send_req_idxs);
7655:   PetscMalloc1(n_sends,&send_req_vals);
7656:   PetscMalloc1(n_sends,&send_req_idxs_is);
7657:   PetscMalloc1(n_sends,&send_req_vecs);
7658:   PetscMalloc1(n_recvs,&recv_req_idxs);
7659:   PetscMalloc1(n_recvs,&recv_req_vals);
7660:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7661:   PetscMalloc1(n_recvs,&recv_req_vecs);

7663:   /* communications */
7664:   ptr_idxs = recv_buffer_idxs;
7665:   ptr_vals = recv_buffer_vals;
7666:   ptr_idxs_is = recv_buffer_idxs_is;
7667:   ptr_vecs = recv_buffer_vecs;
7668:   for (i=0;i<n_recvs;i++) {
7669:     source_dest = onodes[i];
7670:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7671:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7672:     ptr_idxs += olengths_idxs[i];
7673:     ptr_vals += olengths_vals[i];
7674:     if (nis) {
7675:       source_dest = onodes_is[i];
7676:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7677:       ptr_idxs_is += olengths_idxs_is[i];
7678:     }
7679:     if (nvecs) {
7680:       source_dest = onodes[i];
7681:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7682:       ptr_vecs += olengths_idxs[i]-2;
7683:     }
7684:   }
7685:   for (i=0;i<n_sends;i++) {
7686:     PetscMPIIntCast(is_indices[i],&source_dest);
7687:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7688:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7689:     if (nis) {
7690:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7691:     }
7692:     if (nvecs) {
7693:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7694:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7695:     }
7696:   }
7697:   ISRestoreIndices(is_sends_internal,&is_indices);
7698:   ISDestroy(&is_sends_internal);

7700:   /* assemble new l2g map */
7701:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7702:   ptr_idxs = recv_buffer_idxs;
7703:   new_local_rows = 0;
7704:   for (i=0;i<n_recvs;i++) {
7705:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7706:     ptr_idxs += olengths_idxs[i];
7707:   }
7708:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7709:   ptr_idxs = recv_buffer_idxs;
7710:   new_local_rows = 0;
7711:   for (i=0;i<n_recvs;i++) {
7712:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7713:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7714:     ptr_idxs += olengths_idxs[i];
7715:   }
7716:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7717:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7718:   PetscFree(l2gmap_indices);

7720:   /* infer new local matrix type from received local matrices type */
7721:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7722:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7723:   if (n_recvs) {
7724:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7725:     ptr_idxs = recv_buffer_idxs;
7726:     for (i=0;i<n_recvs;i++) {
7727:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7728:         new_local_type_private = MATAIJ_PRIVATE;
7729:         break;
7730:       }
7731:       ptr_idxs += olengths_idxs[i];
7732:     }
7733:     switch (new_local_type_private) {
7734:       case MATDENSE_PRIVATE:
7735:         new_local_type = MATSEQAIJ;
7736:         bs = 1;
7737:         break;
7738:       case MATAIJ_PRIVATE:
7739:         new_local_type = MATSEQAIJ;
7740:         bs = 1;
7741:         break;
7742:       case MATBAIJ_PRIVATE:
7743:         new_local_type = MATSEQBAIJ;
7744:         break;
7745:       case MATSBAIJ_PRIVATE:
7746:         new_local_type = MATSEQSBAIJ;
7747:         break;
7748:       default:
7749:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7750:         break;
7751:     }
7752:   } else { /* by default, new_local_type is seqaij */
7753:     new_local_type = MATSEQAIJ;
7754:     bs = 1;
7755:   }

7757:   /* create MATIS object if needed */
7758:   if (!reuse) {
7759:     MatGetSize(mat,&rows,&cols);
7760:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7761:   } else {
7762:     /* it also destroys the local matrices */
7763:     if (*mat_n) {
7764:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7765:     } else { /* this is a fake object */
7766:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7767:     }
7768:   }
7769:   MatISGetLocalMat(*mat_n,&local_mat);
7770:   MatSetType(local_mat,new_local_type);

7772:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7774:   /* Global to local map of received indices */
7775:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7776:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7777:   ISLocalToGlobalMappingDestroy(&l2gmap);

7779:   /* restore attributes -> type of incoming data and its size */
7780:   buf_size_idxs = 0;
7781:   for (i=0;i<n_recvs;i++) {
7782:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7783:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7784:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7785:   }
7786:   PetscFree(recv_buffer_idxs);

7788:   /* set preallocation */
7789:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7790:   if (!newisdense) {
7791:     PetscInt *new_local_nnz=0;

7793:     ptr_idxs = recv_buffer_idxs_local;
7794:     if (n_recvs) {
7795:       PetscCalloc1(new_local_rows,&new_local_nnz);
7796:     }
7797:     for (i=0;i<n_recvs;i++) {
7798:       PetscInt j;
7799:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7800:         for (j=0;j<*(ptr_idxs+1);j++) {
7801:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7802:         }
7803:       } else {
7804:         /* TODO */
7805:       }
7806:       ptr_idxs += olengths_idxs[i];
7807:     }
7808:     if (new_local_nnz) {
7809:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7810:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7811:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7812:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7813:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7814:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7815:     } else {
7816:       MatSetUp(local_mat);
7817:     }
7818:     PetscFree(new_local_nnz);
7819:   } else {
7820:     MatSetUp(local_mat);
7821:   }

7823:   /* set values */
7824:   ptr_vals = recv_buffer_vals;
7825:   ptr_idxs = recv_buffer_idxs_local;
7826:   for (i=0;i<n_recvs;i++) {
7827:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7828:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7829:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7830:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7831:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7832:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7833:     } else {
7834:       /* TODO */
7835:     }
7836:     ptr_idxs += olengths_idxs[i];
7837:     ptr_vals += olengths_vals[i];
7838:   }
7839:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7840:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7841:   MatISRestoreLocalMat(*mat_n,&local_mat);
7842:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7843:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7844:   PetscFree(recv_buffer_vals);

7846: #if 0
7847:   if (!restrict_comm) { /* check */
7848:     Vec       lvec,rvec;
7849:     PetscReal infty_error;

7851:     MatCreateVecs(mat,&rvec,&lvec);
7852:     VecSetRandom(rvec,NULL);
7853:     MatMult(mat,rvec,lvec);
7854:     VecScale(lvec,-1.0);
7855:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7856:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7857:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7858:     VecDestroy(&rvec);
7859:     VecDestroy(&lvec);
7860:   }
7861: #endif

7863:   /* assemble new additional is (if any) */
7864:   if (nis) {
7865:     PetscInt **temp_idxs,*count_is,j,psum;

7867:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7868:     PetscCalloc1(nis,&count_is);
7869:     ptr_idxs = recv_buffer_idxs_is;
7870:     psum = 0;
7871:     for (i=0;i<n_recvs;i++) {
7872:       for (j=0;j<nis;j++) {
7873:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7874:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7875:         psum += plen;
7876:         ptr_idxs += plen+1; /* shift pointer to received data */
7877:       }
7878:     }
7879:     PetscMalloc1(nis,&temp_idxs);
7880:     PetscMalloc1(psum,&temp_idxs[0]);
7881:     for (i=1;i<nis;i++) {
7882:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7883:     }
7884:     PetscMemzero(count_is,nis*sizeof(PetscInt));
7885:     ptr_idxs = recv_buffer_idxs_is;
7886:     for (i=0;i<n_recvs;i++) {
7887:       for (j=0;j<nis;j++) {
7888:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7889:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7890:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7891:         ptr_idxs += plen+1; /* shift pointer to received data */
7892:       }
7893:     }
7894:     for (i=0;i<nis;i++) {
7895:       ISDestroy(&isarray[i]);
7896:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7897:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7898:     }
7899:     PetscFree(count_is);
7900:     PetscFree(temp_idxs[0]);
7901:     PetscFree(temp_idxs);
7902:   }
7903:   /* free workspace */
7904:   PetscFree(recv_buffer_idxs_is);
7905:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7906:   PetscFree(send_buffer_idxs);
7907:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7908:   if (isdense) {
7909:     MatISGetLocalMat(mat,&local_mat);
7910:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
7911:     MatISRestoreLocalMat(mat,&local_mat);
7912:   } else {
7913:     /* PetscFree(send_buffer_vals); */
7914:   }
7915:   if (nis) {
7916:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7917:     PetscFree(send_buffer_idxs_is);
7918:   }

7920:   if (nvecs) {
7921:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7922:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7923:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7924:     VecDestroy(&nnsp_vec[0]);
7925:     VecCreate(comm_n,&nnsp_vec[0]);
7926:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7927:     VecSetType(nnsp_vec[0],VECSTANDARD);
7928:     /* set values */
7929:     ptr_vals = recv_buffer_vecs;
7930:     ptr_idxs = recv_buffer_idxs_local;
7931:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7932:     for (i=0;i<n_recvs;i++) {
7933:       PetscInt j;
7934:       for (j=0;j<*(ptr_idxs+1);j++) {
7935:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7936:       }
7937:       ptr_idxs += olengths_idxs[i];
7938:       ptr_vals += olengths_idxs[i]-2;
7939:     }
7940:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7941:     VecAssemblyBegin(nnsp_vec[0]);
7942:     VecAssemblyEnd(nnsp_vec[0]);
7943:   }

7945:   PetscFree(recv_buffer_vecs);
7946:   PetscFree(recv_buffer_idxs_local);
7947:   PetscFree(recv_req_idxs);
7948:   PetscFree(recv_req_vals);
7949:   PetscFree(recv_req_vecs);
7950:   PetscFree(recv_req_idxs_is);
7951:   PetscFree(send_req_idxs);
7952:   PetscFree(send_req_vals);
7953:   PetscFree(send_req_vecs);
7954:   PetscFree(send_req_idxs_is);
7955:   PetscFree(ilengths_vals);
7956:   PetscFree(ilengths_idxs);
7957:   PetscFree(olengths_vals);
7958:   PetscFree(olengths_idxs);
7959:   PetscFree(onodes);
7960:   if (nis) {
7961:     PetscFree(ilengths_idxs_is);
7962:     PetscFree(olengths_idxs_is);
7963:     PetscFree(onodes_is);
7964:   }
7965:   PetscSubcommDestroy(&subcomm);
7966:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7967:     MatDestroy(mat_n);
7968:     for (i=0;i<nis;i++) {
7969:       ISDestroy(&isarray[i]);
7970:     }
7971:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7972:       VecDestroy(&nnsp_vec[0]);
7973:     }
7974:     *mat_n = NULL;
7975:   }
7976:   return(0);
7977: }

7979: /* temporary hack into ksp private data structure */
7980:  #include <petsc/private/kspimpl.h>

7982: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7983: {
7984:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7985:   PC_IS                  *pcis = (PC_IS*)pc->data;
7986:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
7987:   Mat                    coarsedivudotp = NULL;
7988:   Mat                    coarseG,t_coarse_mat_is;
7989:   MatNullSpace           CoarseNullSpace = NULL;
7990:   ISLocalToGlobalMapping coarse_islg;
7991:   IS                     coarse_is,*isarray,corners;
7992:   PetscInt               i,im_active=-1,active_procs=-1;
7993:   PetscInt               nis,nisdofs,nisneu,nisvert;
7994:   PetscInt               coarse_eqs_per_proc;
7995:   PC                     pc_temp;
7996:   PCType                 coarse_pc_type;
7997:   KSPType                coarse_ksp_type;
7998:   PetscBool              multilevel_requested,multilevel_allowed;
7999:   PetscBool              coarse_reuse;
8000:   PetscInt               ncoarse,nedcfield;
8001:   PetscBool              compute_vecs = PETSC_FALSE;
8002:   PetscScalar            *array;
8003:   MatReuse               coarse_mat_reuse;
8004:   PetscBool              restr, full_restr, have_void;
8005:   PetscMPIInt            size;
8006:   PetscErrorCode         ierr;

8009:   PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8010:   /* Assign global numbering to coarse dofs */
8011:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8012:     PetscInt ocoarse_size;
8013:     compute_vecs = PETSC_TRUE;

8015:     pcbddc->new_primal_space = PETSC_TRUE;
8016:     ocoarse_size = pcbddc->coarse_size;
8017:     PetscFree(pcbddc->global_primal_indices);
8018:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8019:     /* see if we can avoid some work */
8020:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8021:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8022:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8023:         KSPReset(pcbddc->coarse_ksp);
8024:         coarse_reuse = PETSC_FALSE;
8025:       } else { /* we can safely reuse already computed coarse matrix */
8026:         coarse_reuse = PETSC_TRUE;
8027:       }
8028:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8029:       coarse_reuse = PETSC_FALSE;
8030:     }
8031:     /* reset any subassembling information */
8032:     if (!coarse_reuse || pcbddc->recompute_topography) {
8033:       ISDestroy(&pcbddc->coarse_subassembling);
8034:     }
8035:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8036:     coarse_reuse = PETSC_TRUE;
8037:   }
8038:   if (coarse_reuse && pcbddc->coarse_ksp) {
8039:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8040:     PetscObjectReference((PetscObject)coarse_mat);
8041:     coarse_mat_reuse = MAT_REUSE_MATRIX;
8042:   } else {
8043:     coarse_mat = NULL;
8044:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8045:   }

8047:   /* creates temporary l2gmap and IS for coarse indexes */
8048:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8049:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

8051:   /* creates temporary MATIS object for coarse matrix */
8052:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8053:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
8054:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8055:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8056:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8057:   MatDestroy(&coarse_submat_dense);

8059:   /* count "active" (i.e. with positive local size) and "void" processes */
8060:   im_active = !!(pcis->n);
8061:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

8063:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8064:   /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
8065:   /* full_restr : just use the receivers from the subassembling pattern */
8066:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8067:   coarse_mat_is        = NULL;
8068:   multilevel_allowed   = PETSC_FALSE;
8069:   multilevel_requested = PETSC_FALSE;
8070:   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8071:   if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8072:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8073:   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8074:   if (multilevel_requested) {
8075:     ncoarse    = active_procs/pcbddc->coarsening_ratio;
8076:     restr      = PETSC_FALSE;
8077:     full_restr = PETSC_FALSE;
8078:   } else {
8079:     ncoarse    = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8080:     restr      = PETSC_TRUE;
8081:     full_restr = PETSC_TRUE;
8082:   }
8083:   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8084:   ncoarse = PetscMax(1,ncoarse);
8085:   if (!pcbddc->coarse_subassembling) {
8086:     if (pcbddc->coarsening_ratio > 1) {
8087:       if (multilevel_requested) {
8088:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8089:       } else {
8090:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8091:       }
8092:     } else {
8093:       PetscMPIInt rank;
8094:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8095:       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8096:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8097:     }
8098:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8099:     PetscInt    psum;
8100:     if (pcbddc->coarse_ksp) psum = 1;
8101:     else psum = 0;
8102:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8103:     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8104:   }
8105:   /* determine if we can go multilevel */
8106:   if (multilevel_requested) {
8107:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8108:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8109:   }
8110:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

8112:   /* dump subassembling pattern */
8113:   if (pcbddc->dbg_flag && multilevel_allowed) {
8114:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8115:   }
8116:   /* compute dofs splitting and neumann boundaries for coarse dofs */
8117:   nedcfield = -1;
8118:   corners = NULL;
8119:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneded computations */
8120:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
8121:     const PetscInt         *idxs;
8122:     ISLocalToGlobalMapping tmap;

8124:     /* create map between primal indices (in local representative ordering) and local primal numbering */
8125:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8126:     /* allocate space for temporary storage */
8127:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8128:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8129:     /* allocate for IS array */
8130:     nisdofs = pcbddc->n_ISForDofsLocal;
8131:     if (pcbddc->nedclocal) {
8132:       if (pcbddc->nedfield > -1) {
8133:         nedcfield = pcbddc->nedfield;
8134:       } else {
8135:         nedcfield = 0;
8136:         if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8137:         nisdofs = 1;
8138:       }
8139:     }
8140:     nisneu = !!pcbddc->NeumannBoundariesLocal;
8141:     nisvert = 0; /* nisvert is not used */
8142:     nis = nisdofs + nisneu + nisvert;
8143:     PetscMalloc1(nis,&isarray);
8144:     /* dofs splitting */
8145:     for (i=0;i<nisdofs;i++) {
8146:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
8147:       if (nedcfield != i) {
8148:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8149:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8150:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8151:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8152:       } else {
8153:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
8154:         ISGetIndices(pcbddc->nedclocal,&idxs);
8155:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8156:         if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8157:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
8158:       }
8159:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8160:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8161:       /* ISView(isarray[i],0); */
8162:     }
8163:     /* neumann boundaries */
8164:     if (pcbddc->NeumannBoundariesLocal) {
8165:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8166:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8167:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8168:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8169:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8170:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8171:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8172:       /* ISView(isarray[nisdofs],0); */
8173:     }
8174:     /* coordinates */
8175:     if (pcbddc->corner_selected) {
8176:       PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8177:       ISGetLocalSize(corners,&tsize);
8178:       ISGetIndices(corners,&idxs);
8179:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8180:       if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8181:       ISRestoreIndices(corners,&idxs);
8182:       PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8183:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8184:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8185:     }
8186:     PetscFree(tidxs);
8187:     PetscFree(tidxs2);
8188:     ISLocalToGlobalMappingDestroy(&tmap);
8189:   } else {
8190:     nis = 0;
8191:     nisdofs = 0;
8192:     nisneu = 0;
8193:     nisvert = 0;
8194:     isarray = NULL;
8195:   }
8196:   /* destroy no longer needed map */
8197:   ISLocalToGlobalMappingDestroy(&coarse_islg);

8199:   /* subassemble */
8200:   if (multilevel_allowed) {
8201:     Vec       vp[1];
8202:     PetscInt  nvecs = 0;
8203:     PetscBool reuse,reuser;

8205:     if (coarse_mat) reuse = PETSC_TRUE;
8206:     else reuse = PETSC_FALSE;
8207:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8208:     vp[0] = NULL;
8209:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8210:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8211:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8212:       VecSetType(vp[0],VECSTANDARD);
8213:       nvecs = 1;

8215:       if (pcbddc->divudotp) {
8216:         Mat      B,loc_divudotp;
8217:         Vec      v,p;
8218:         IS       dummy;
8219:         PetscInt np;

8221:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8222:         MatGetSize(loc_divudotp,&np,NULL);
8223:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8224:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8225:         MatCreateVecs(B,&v,&p);
8226:         VecSet(p,1.);
8227:         MatMultTranspose(B,p,v);
8228:         VecDestroy(&p);
8229:         MatDestroy(&B);
8230:         VecGetArray(vp[0],&array);
8231:         VecPlaceArray(pcbddc->vec1_P,array);
8232:         VecRestoreArray(vp[0],&array);
8233:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8234:         VecResetArray(pcbddc->vec1_P);
8235:         ISDestroy(&dummy);
8236:         VecDestroy(&v);
8237:       }
8238:     }
8239:     if (reuser) {
8240:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8241:     } else {
8242:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8243:     }
8244:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8245:       PetscScalar *arraym,*arrayv;
8246:       PetscInt    nl;
8247:       VecGetLocalSize(vp[0],&nl);
8248:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8249:       MatDenseGetArray(coarsedivudotp,&arraym);
8250:       VecGetArray(vp[0],&arrayv);
8251:       PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
8252:       VecRestoreArray(vp[0],&arrayv);
8253:       MatDenseRestoreArray(coarsedivudotp,&arraym);
8254:       VecDestroy(&vp[0]);
8255:     } else {
8256:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8257:     }
8258:   } else {
8259:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8260:   }
8261:   if (coarse_mat_is || coarse_mat) {
8262:     if (!multilevel_allowed) {
8263:       MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8264:     } else {
8265:       Mat A;

8267:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8268:       if (coarse_mat_is) {
8269:         if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8270:         PetscObjectReference((PetscObject)coarse_mat_is);
8271:         coarse_mat = coarse_mat_is;
8272:       }
8273:       /* be sure we don't have MatSeqDENSE as local mat */
8274:       MatISGetLocalMat(coarse_mat,&A);
8275:       MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
8276:     }
8277:   }
8278:   MatDestroy(&t_coarse_mat_is);
8279:   MatDestroy(&coarse_mat_is);

8281:   /* create local to global scatters for coarse problem */
8282:   if (compute_vecs) {
8283:     PetscInt lrows;
8284:     VecDestroy(&pcbddc->coarse_vec);
8285:     if (coarse_mat) {
8286:       MatGetLocalSize(coarse_mat,&lrows,NULL);
8287:     } else {
8288:       lrows = 0;
8289:     }
8290:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8291:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8292:     VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8293:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8294:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8295:   }
8296:   ISDestroy(&coarse_is);

8298:   /* set defaults for coarse KSP and PC */
8299:   if (multilevel_allowed) {
8300:     coarse_ksp_type = KSPRICHARDSON;
8301:     coarse_pc_type  = PCBDDC;
8302:   } else {
8303:     coarse_ksp_type = KSPPREONLY;
8304:     coarse_pc_type  = PCREDUNDANT;
8305:   }

8307:   /* print some info if requested */
8308:   if (pcbddc->dbg_flag) {
8309:     if (!multilevel_allowed) {
8310:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8311:       if (multilevel_requested) {
8312:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8313:       } else if (pcbddc->max_levels) {
8314:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8315:       }
8316:       PetscViewerFlush(pcbddc->dbg_viewer);
8317:     }
8318:   }

8320:   /* communicate coarse discrete gradient */
8321:   coarseG = NULL;
8322:   if (pcbddc->nedcG && multilevel_allowed) {
8323:     MPI_Comm ccomm;
8324:     if (coarse_mat) {
8325:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8326:     } else {
8327:       ccomm = MPI_COMM_NULL;
8328:     }
8329:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8330:   }

8332:   /* create the coarse KSP object only once with defaults */
8333:   if (coarse_mat) {
8334:     PetscBool   isredundant,isnn,isbddc;
8335:     PetscViewer dbg_viewer = NULL;

8337:     if (pcbddc->dbg_flag) {
8338:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8339:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8340:     }
8341:     if (!pcbddc->coarse_ksp) {
8342:       char   prefix[256],str_level[16];
8343:       size_t len;

8345:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8346:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8347:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8348:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8349:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8350:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8351:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8352:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8353:       /* TODO is this logic correct? should check for coarse_mat type */
8354:       PCSetType(pc_temp,coarse_pc_type);
8355:       /* prefix */
8356:       PetscStrcpy(prefix,"");
8357:       PetscStrcpy(str_level,"");
8358:       if (!pcbddc->current_level) {
8359:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8360:         PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8361:       } else {
8362:         PetscStrlen(((PetscObject)pc)->prefix,&len);
8363:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8364:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8365:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8366:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8367:         PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8368:         PetscStrlcat(prefix,str_level,sizeof(prefix));
8369:       }
8370:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8371:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8372:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8373:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8374:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8375:       /* allow user customization */
8376:       KSPSetFromOptions(pcbddc->coarse_ksp);
8377:       /* get some info after set from options */
8378:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8379:       /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8380:       PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8381:       PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8382:       if (multilevel_allowed && !isbddc && !isnn) {
8383:         isbddc = PETSC_TRUE;
8384:         PCSetType(pc_temp,PCBDDC);
8385:         PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8386:         PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8387:         PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8388:         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8389:           PetscObjectOptionsBegin((PetscObject)pc_temp);
8390:           (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8391:           PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8392:           PetscOptionsEnd();
8393:           pc_temp->setfromoptionscalled++;
8394:         }
8395:       }
8396:     }
8397:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8398:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8399:     if (nisdofs) {
8400:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8401:       for (i=0;i<nisdofs;i++) {
8402:         ISDestroy(&isarray[i]);
8403:       }
8404:     }
8405:     if (nisneu) {
8406:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8407:       ISDestroy(&isarray[nisdofs]);
8408:     }
8409:     if (nisvert) {
8410:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8411:       ISDestroy(&isarray[nis-1]);
8412:     }
8413:     if (coarseG) {
8414:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8415:     }

8417:     /* get some info after set from options */
8418:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);

8420:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8421:     if (isbddc && !multilevel_allowed) {
8422:       PCSetType(pc_temp,coarse_pc_type);
8423:       isbddc = PETSC_FALSE;
8424:     }
8425:     /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8426:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8427:     if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) {
8428:       PCSetType(pc_temp,PCBDDC);
8429:       isbddc = PETSC_TRUE;
8430:     }
8431:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8432:     if (isredundant) {
8433:       KSP inner_ksp;
8434:       PC  inner_pc;

8436:       PCRedundantGetKSP(pc_temp,&inner_ksp);
8437:       KSPGetPC(inner_ksp,&inner_pc);
8438:     }

8440:     /* parameters which miss an API */
8441:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8442:     if (isbddc) {
8443:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;

8445:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8446:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8447:       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8448:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8449:       if (pcbddc_coarse->benign_saddle_point) {
8450:         Mat                    coarsedivudotp_is;
8451:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8452:         IS                     row,col;
8453:         const PetscInt         *gidxs;
8454:         PetscInt               n,st,M,N;

8456:         MatGetSize(coarsedivudotp,&n,NULL);
8457:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8458:         st   = st-n;
8459:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8460:         MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8461:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
8462:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8463:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8464:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8465:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
8466:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
8467:         ISGetSize(row,&M);
8468:         MatGetSize(coarse_mat,&N,NULL);
8469:         ISDestroy(&row);
8470:         ISDestroy(&col);
8471:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8472:         MatSetType(coarsedivudotp_is,MATIS);
8473:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8474:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8475:         ISLocalToGlobalMappingDestroy(&rl2g);
8476:         ISLocalToGlobalMappingDestroy(&cl2g);
8477:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8478:         MatDestroy(&coarsedivudotp);
8479:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8480:         MatDestroy(&coarsedivudotp_is);
8481:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8482:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8483:       }
8484:     }

8486:     /* propagate symmetry info of coarse matrix */
8487:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8488:     if (pc->pmat->symmetric_set) {
8489:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8490:     }
8491:     if (pc->pmat->hermitian_set) {
8492:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8493:     }
8494:     if (pc->pmat->spd_set) {
8495:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8496:     }
8497:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8498:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8499:     }
8500:     /* set operators */
8501:     MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8502:     MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8503:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8504:     if (pcbddc->dbg_flag) {
8505:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8506:     }
8507:   }
8508:   MatDestroy(&coarseG);
8509:   PetscFree(isarray);
8510: #if 0
8511:   {
8512:     PetscViewer viewer;
8513:     char filename[256];
8514:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8515:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8516:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8517:     MatView(coarse_mat,viewer);
8518:     PetscViewerPopFormat(viewer);
8519:     PetscViewerDestroy(&viewer);
8520:   }
8521: #endif

8523:   if (corners) {
8524:     Vec            gv;
8525:     IS             is;
8526:     const PetscInt *idxs;
8527:     PetscInt       i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8528:     PetscScalar    *coords;

8530:     if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8531:     VecGetSize(pcbddc->coarse_vec,&N);
8532:     VecGetLocalSize(pcbddc->coarse_vec,&n);
8533:     VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8534:     VecSetBlockSize(gv,cdim);
8535:     VecSetSizes(gv,n*cdim,N*cdim);
8536:     VecSetType(gv,VECSTANDARD);
8537:     VecSetFromOptions(gv);
8538:     VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */

8540:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8541:     ISGetLocalSize(is,&n);
8542:     ISGetIndices(is,&idxs);
8543:     PetscMalloc1(n*cdim,&coords);
8544:     for (i=0;i<n;i++) {
8545:       for (d=0;d<cdim;d++) {
8546:         coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8547:       }
8548:     }
8549:     ISRestoreIndices(is,&idxs);
8550:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);

8552:     ISGetLocalSize(corners,&n);
8553:     ISGetIndices(corners,&idxs);
8554:     VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8555:     ISRestoreIndices(corners,&idxs);
8556:     PetscFree(coords);
8557:     VecAssemblyBegin(gv);
8558:     VecAssemblyEnd(gv);
8559:     VecGetArray(gv,&coords);
8560:     if (pcbddc->coarse_ksp) {
8561:       PC        coarse_pc;
8562:       PetscBool isbddc;

8564:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8565:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8566:       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8567:         PetscReal *realcoords;

8569:         VecGetLocalSize(gv,&n);
8570: #if defined(PETSC_USE_COMPLEX)
8571:         PetscMalloc1(n,&realcoords);
8572:         for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8573: #else
8574:         realcoords = coords;
8575: #endif
8576:         PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8577: #if defined(PETSC_USE_COMPLEX)
8578:         PetscFree(realcoords);
8579: #endif
8580:       }
8581:     }
8582:     VecRestoreArray(gv,&coords);
8583:     VecDestroy(&gv);
8584:   }
8585:   ISDestroy(&corners);

8587:   if (pcbddc->coarse_ksp) {
8588:     Vec crhs,csol;

8590:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
8591:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8592:     if (!csol) {
8593:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8594:     }
8595:     if (!crhs) {
8596:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8597:     }
8598:   }
8599:   MatDestroy(&coarsedivudotp);

8601:   /* compute null space for coarse solver if the benign trick has been requested */
8602:   if (pcbddc->benign_null) {

8604:     VecSet(pcbddc->vec1_P,0.);
8605:     for (i=0;i<pcbddc->benign_n;i++) {
8606:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8607:     }
8608:     VecAssemblyBegin(pcbddc->vec1_P);
8609:     VecAssemblyEnd(pcbddc->vec1_P);
8610:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8611:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8612:     if (coarse_mat) {
8613:       Vec         nullv;
8614:       PetscScalar *array,*array2;
8615:       PetscInt    nl;

8617:       MatCreateVecs(coarse_mat,&nullv,NULL);
8618:       VecGetLocalSize(nullv,&nl);
8619:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8620:       VecGetArray(nullv,&array2);
8621:       PetscMemcpy(array2,array,nl*sizeof(*array));
8622:       VecRestoreArray(nullv,&array2);
8623:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8624:       VecNormalize(nullv,NULL);
8625:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8626:       VecDestroy(&nullv);
8627:     }
8628:   }
8629:   PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);

8631:   PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8632:   if (pcbddc->coarse_ksp) {
8633:     PetscBool ispreonly;

8635:     if (CoarseNullSpace) {
8636:       PetscBool isnull;
8637:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8638:       if (isnull) {
8639:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8640:       }
8641:       /* TODO: add local nullspaces (if any) */
8642:     }
8643:     /* setup coarse ksp */
8644:     KSPSetUp(pcbddc->coarse_ksp);
8645:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8646:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8647:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8648:       KSP       check_ksp;
8649:       KSPType   check_ksp_type;
8650:       PC        check_pc;
8651:       Vec       check_vec,coarse_vec;
8652:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8653:       PetscInt  its;
8654:       PetscBool compute_eigs;
8655:       PetscReal *eigs_r,*eigs_c;
8656:       PetscInt  neigs;
8657:       const char *prefix;

8659:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8660:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8661:       PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8662:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8663:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8664:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8665:       /* prevent from setup unneeded object */
8666:       KSPGetPC(check_ksp,&check_pc);
8667:       PCSetType(check_pc,PCNONE);
8668:       if (ispreonly) {
8669:         check_ksp_type = KSPPREONLY;
8670:         compute_eigs = PETSC_FALSE;
8671:       } else {
8672:         check_ksp_type = KSPGMRES;
8673:         compute_eigs = PETSC_TRUE;
8674:       }
8675:       KSPSetType(check_ksp,check_ksp_type);
8676:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8677:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8678:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8679:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8680:       KSPSetOptionsPrefix(check_ksp,prefix);
8681:       KSPAppendOptionsPrefix(check_ksp,"check_");
8682:       KSPSetFromOptions(check_ksp);
8683:       KSPSetUp(check_ksp);
8684:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8685:       KSPSetPC(check_ksp,check_pc);
8686:       /* create random vec */
8687:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8688:       VecSetRandom(check_vec,NULL);
8689:       MatMult(coarse_mat,check_vec,coarse_vec);
8690:       /* solve coarse problem */
8691:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8692:       KSPCheckSolve(check_ksp,pc,coarse_vec);
8693:       /* set eigenvalue estimation if preonly has not been requested */
8694:       if (compute_eigs) {
8695:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8696:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8697:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8698:         if (neigs) {
8699:           lambda_max = eigs_r[neigs-1];
8700:           lambda_min = eigs_r[0];
8701:           if (pcbddc->use_coarse_estimates) {
8702:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8703:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8704:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8705:             }
8706:           }
8707:         }
8708:       }

8710:       /* check coarse problem residual error */
8711:       if (pcbddc->dbg_flag) {
8712:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8713:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8714:         VecAXPY(check_vec,-1.0,coarse_vec);
8715:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8716:         MatMult(coarse_mat,check_vec,coarse_vec);
8717:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8718:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8719:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8720:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8721:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8722:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8723:         if (CoarseNullSpace) {
8724:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8725:         }
8726:         if (compute_eigs) {
8727:           PetscReal          lambda_max_s,lambda_min_s;
8728:           KSPConvergedReason reason;
8729:           KSPGetType(check_ksp,&check_ksp_type);
8730:           KSPGetIterationNumber(check_ksp,&its);
8731:           KSPGetConvergedReason(check_ksp,&reason);
8732:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8733:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8734:           for (i=0;i<neigs;i++) {
8735:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8736:           }
8737:         }
8738:         PetscViewerFlush(dbg_viewer);
8739:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8740:       }
8741:       VecDestroy(&check_vec);
8742:       VecDestroy(&coarse_vec);
8743:       KSPDestroy(&check_ksp);
8744:       if (compute_eigs) {
8745:         PetscFree(eigs_r);
8746:         PetscFree(eigs_c);
8747:       }
8748:     }
8749:   }
8750:   MatNullSpaceDestroy(&CoarseNullSpace);
8751:   /* print additional info */
8752:   if (pcbddc->dbg_flag) {
8753:     /* waits until all processes reaches this point */
8754:     PetscBarrier((PetscObject)pc);
8755:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8756:     PetscViewerFlush(pcbddc->dbg_viewer);
8757:   }

8759:   /* free memory */
8760:   MatDestroy(&coarse_mat);
8761:   PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8762:   return(0);
8763: }

8765: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8766: {
8767:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8768:   PC_IS*         pcis = (PC_IS*)pc->data;
8769:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8770:   IS             subset,subset_mult,subset_n;
8771:   PetscInt       local_size,coarse_size=0;
8772:   PetscInt       *local_primal_indices=NULL;
8773:   const PetscInt *t_local_primal_indices;

8777:   /* Compute global number of coarse dofs */
8778:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8779:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8780:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8781:   ISDestroy(&subset_n);
8782:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8783:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8784:   ISDestroy(&subset);
8785:   ISDestroy(&subset_mult);
8786:   ISGetLocalSize(subset_n,&local_size);
8787:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8788:   PetscMalloc1(local_size,&local_primal_indices);
8789:   ISGetIndices(subset_n,&t_local_primal_indices);
8790:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8791:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8792:   ISDestroy(&subset_n);

8794:   /* check numbering */
8795:   if (pcbddc->dbg_flag) {
8796:     PetscScalar coarsesum,*array,*array2;
8797:     PetscInt    i;
8798:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8800:     PetscViewerFlush(pcbddc->dbg_viewer);
8801:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8802:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8803:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8804:     /* counter */
8805:     VecSet(pcis->vec1_global,0.0);
8806:     VecSet(pcis->vec1_N,1.0);
8807:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8808:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8809:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8810:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8811:     VecSet(pcis->vec1_N,0.0);
8812:     for (i=0;i<pcbddc->local_primal_size;i++) {
8813:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8814:     }
8815:     VecAssemblyBegin(pcis->vec1_N);
8816:     VecAssemblyEnd(pcis->vec1_N);
8817:     VecSet(pcis->vec1_global,0.0);
8818:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8819:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8820:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8821:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8822:     VecGetArray(pcis->vec1_N,&array);
8823:     VecGetArray(pcis->vec2_N,&array2);
8824:     for (i=0;i<pcis->n;i++) {
8825:       if (array[i] != 0.0 && array[i] != array2[i]) {
8826:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8827:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8828:         set_error = PETSC_TRUE;
8829:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8830:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8831:       }
8832:     }
8833:     VecRestoreArray(pcis->vec2_N,&array2);
8834:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8835:     PetscViewerFlush(pcbddc->dbg_viewer);
8836:     for (i=0;i<pcis->n;i++) {
8837:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8838:     }
8839:     VecRestoreArray(pcis->vec1_N,&array);
8840:     VecSet(pcis->vec1_global,0.0);
8841:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8842:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8843:     VecSum(pcis->vec1_global,&coarsesum);
8844:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8845:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8846:       PetscInt *gidxs;

8848:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8849:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8850:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8851:       PetscViewerFlush(pcbddc->dbg_viewer);
8852:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8853:       for (i=0;i<pcbddc->local_primal_size;i++) {
8854:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8855:       }
8856:       PetscViewerFlush(pcbddc->dbg_viewer);
8857:       PetscFree(gidxs);
8858:     }
8859:     PetscViewerFlush(pcbddc->dbg_viewer);
8860:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8861:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8862:   }

8864:   /* get back data */
8865:   *coarse_size_n = coarse_size;
8866:   *local_primal_indices_n = local_primal_indices;
8867:   return(0);
8868: }

8870: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8871: {
8872:   IS             localis_t;
8873:   PetscInt       i,lsize,*idxs,n;
8874:   PetscScalar    *vals;

8878:   /* get indices in local ordering exploiting local to global map */
8879:   ISGetLocalSize(globalis,&lsize);
8880:   PetscMalloc1(lsize,&vals);
8881:   for (i=0;i<lsize;i++) vals[i] = 1.0;
8882:   ISGetIndices(globalis,(const PetscInt**)&idxs);
8883:   VecSet(gwork,0.0);
8884:   VecSet(lwork,0.0);
8885:   if (idxs) { /* multilevel guard */
8886:     VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8887:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8888:   }
8889:   VecAssemblyBegin(gwork);
8890:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8891:   PetscFree(vals);
8892:   VecAssemblyEnd(gwork);
8893:   /* now compute set in local ordering */
8894:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8895:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8896:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8897:   VecGetSize(lwork,&n);
8898:   for (i=0,lsize=0;i<n;i++) {
8899:     if (PetscRealPart(vals[i]) > 0.5) {
8900:       lsize++;
8901:     }
8902:   }
8903:   PetscMalloc1(lsize,&idxs);
8904:   for (i=0,lsize=0;i<n;i++) {
8905:     if (PetscRealPart(vals[i]) > 0.5) {
8906:       idxs[lsize++] = i;
8907:     }
8908:   }
8909:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8910:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8911:   *localis = localis_t;
8912:   return(0);
8913: }

8915: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8916: {
8917:   PC_IS               *pcis=(PC_IS*)pc->data;
8918:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8919:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
8920:   Mat                 S_j;
8921:   PetscInt            *used_xadj,*used_adjncy;
8922:   PetscBool           free_used_adj;
8923:   PetscErrorCode      ierr;

8926:   PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
8927:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8928:   free_used_adj = PETSC_FALSE;
8929:   if (pcbddc->sub_schurs_layers == -1) {
8930:     used_xadj = NULL;
8931:     used_adjncy = NULL;
8932:   } else {
8933:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8934:       used_xadj = pcbddc->mat_graph->xadj;
8935:       used_adjncy = pcbddc->mat_graph->adjncy;
8936:     } else if (pcbddc->computed_rowadj) {
8937:       used_xadj = pcbddc->mat_graph->xadj;
8938:       used_adjncy = pcbddc->mat_graph->adjncy;
8939:     } else {
8940:       PetscBool      flg_row=PETSC_FALSE;
8941:       const PetscInt *xadj,*adjncy;
8942:       PetscInt       nvtxs;

8944:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8945:       if (flg_row) {
8946:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8947:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8948:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8949:         free_used_adj = PETSC_TRUE;
8950:       } else {
8951:         pcbddc->sub_schurs_layers = -1;
8952:         used_xadj = NULL;
8953:         used_adjncy = NULL;
8954:       }
8955:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8956:     }
8957:   }

8959:   /* setup sub_schurs data */
8960:   MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8961:   if (!sub_schurs->schur_explicit) {
8962:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8963:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8964:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8965:   } else {
8966:     Mat       change = NULL;
8967:     Vec       scaling = NULL;
8968:     IS        change_primal = NULL, iP;
8969:     PetscInt  benign_n;
8970:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8971:     PetscBool isseqaij,need_change = PETSC_FALSE;
8972:     PetscBool discrete_harmonic = PETSC_FALSE;

8974:     if (!pcbddc->use_vertices && reuse_solvers) {
8975:       PetscInt n_vertices;

8977:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8978:       reuse_solvers = (PetscBool)!n_vertices;
8979:     }
8980:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8981:     if (!isseqaij) {
8982:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8983:       if (matis->A == pcbddc->local_mat) {
8984:         MatDestroy(&pcbddc->local_mat);
8985:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8986:       } else {
8987:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8988:       }
8989:     }
8990:     if (!pcbddc->benign_change_explicit) {
8991:       benign_n = pcbddc->benign_n;
8992:     } else {
8993:       benign_n = 0;
8994:     }
8995:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8996:        We need a global reduction to avoid possible deadlocks.
8997:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8998:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8999:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9000:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9001:       need_change = (PetscBool)(!need_change);
9002:     }
9003:     /* If the user defines additional constraints, we import them here.
9004:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9005:     if (need_change) {
9006:       PC_IS   *pcisf;
9007:       PC_BDDC *pcbddcf;
9008:       PC      pcf;

9010:       if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9011:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9012:       PCSetOperators(pcf,pc->mat,pc->pmat);
9013:       PCSetType(pcf,PCBDDC);

9015:       /* hacks */
9016:       pcisf                        = (PC_IS*)pcf->data;
9017:       pcisf->is_B_local            = pcis->is_B_local;
9018:       pcisf->vec1_N                = pcis->vec1_N;
9019:       pcisf->BtoNmap               = pcis->BtoNmap;
9020:       pcisf->n                     = pcis->n;
9021:       pcisf->n_B                   = pcis->n_B;
9022:       pcbddcf                      = (PC_BDDC*)pcf->data;
9023:       PetscFree(pcbddcf->mat_graph);
9024:       pcbddcf->mat_graph           = pcbddc->mat_graph;
9025:       pcbddcf->use_faces           = PETSC_TRUE;
9026:       pcbddcf->use_change_of_basis = PETSC_TRUE;
9027:       pcbddcf->use_change_on_faces = PETSC_TRUE;
9028:       pcbddcf->use_qr_single       = PETSC_TRUE;
9029:       pcbddcf->fake_change         = PETSC_TRUE;

9031:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9032:       PCBDDCConstraintsSetUp(pcf);
9033:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9034:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9035:       change = pcbddcf->ConstraintMatrix;
9036:       pcbddcf->ConstraintMatrix = NULL;

9038:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9039:       PetscFree(pcbddcf->sub_schurs);
9040:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9041:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9042:       PetscFree(pcbddcf->primal_indices_local_idxs);
9043:       PetscFree(pcbddcf->onearnullvecs_state);
9044:       PetscFree(pcf->data);
9045:       pcf->ops->destroy = NULL;
9046:       pcf->ops->reset   = NULL;
9047:       PCDestroy(&pcf);
9048:     }
9049:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

9051:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9052:     if (iP) {
9053:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9054:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9055:       PetscOptionsEnd();
9056:     }
9057:     if (discrete_harmonic) {
9058:       Mat A;
9059:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9060:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9061:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9062:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9063:       MatDestroy(&A);
9064:     } else {
9065:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9066:     }
9067:     MatDestroy(&change);
9068:     ISDestroy(&change_primal);
9069:   }
9070:   MatDestroy(&S_j);

9072:   /* free adjacency */
9073:   if (free_used_adj) {
9074:     PetscFree2(used_xadj,used_adjncy);
9075:   }
9076:   PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9077:   return(0);
9078: }

9080: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9081: {
9082:   PC_IS               *pcis=(PC_IS*)pc->data;
9083:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9084:   PCBDDCGraph         graph;
9085:   PetscErrorCode      ierr;

9088:   /* attach interface graph for determining subsets */
9089:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9090:     IS       verticesIS,verticescomm;
9091:     PetscInt vsize,*idxs;

9093:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9094:     ISGetSize(verticesIS,&vsize);
9095:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9096:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9097:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9098:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9099:     PCBDDCGraphCreate(&graph);
9100:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9101:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9102:     ISDestroy(&verticescomm);
9103:     PCBDDCGraphComputeConnectedComponents(graph);
9104:   } else {
9105:     graph = pcbddc->mat_graph;
9106:   }
9107:   /* print some info */
9108:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9109:     IS       vertices;
9110:     PetscInt nv,nedges,nfaces;
9111:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9112:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9113:     ISGetSize(vertices,&nv);
9114:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9115:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9116:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9117:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9118:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9119:     PetscViewerFlush(pcbddc->dbg_viewer);
9120:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9121:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9122:   }

9124:   /* sub_schurs init */
9125:   if (!pcbddc->sub_schurs) {
9126:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9127:   }
9128:   PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);

9130:   /* free graph struct */
9131:   if (pcbddc->sub_schurs_rebuild) {
9132:     PCBDDCGraphDestroy(&graph);
9133:   }
9134:   return(0);
9135: }

9137: PetscErrorCode PCBDDCCheckOperator(PC pc)
9138: {
9139:   PC_IS               *pcis=(PC_IS*)pc->data;
9140:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9141:   PetscErrorCode      ierr;

9144:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9145:     IS             zerodiag = NULL;
9146:     Mat            S_j,B0_B=NULL;
9147:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
9148:     PetscScalar    *p0_check,*array,*array2;
9149:     PetscReal      norm;
9150:     PetscInt       i;

9152:     /* B0 and B0_B */
9153:     if (zerodiag) {
9154:       IS       dummy;

9156:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9157:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9158:       MatCreateVecs(B0_B,NULL,&dummy_vec);
9159:       ISDestroy(&dummy);
9160:     }
9161:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9162:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9163:     VecSet(pcbddc->vec1_P,1.0);
9164:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9165:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9166:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9167:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9168:     VecReciprocal(vec_scale_P);
9169:     /* S_j */
9170:     MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9171:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

9173:     /* mimic vector in \widetilde{W}_\Gamma */
9174:     VecSetRandom(pcis->vec1_N,NULL);
9175:     /* continuous in primal space */
9176:     VecSetRandom(pcbddc->coarse_vec,NULL);
9177:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9178:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9179:     VecGetArray(pcbddc->vec1_P,&array);
9180:     PetscCalloc1(pcbddc->benign_n,&p0_check);
9181:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9182:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9183:     VecRestoreArray(pcbddc->vec1_P,&array);
9184:     VecAssemblyBegin(pcis->vec1_N);
9185:     VecAssemblyEnd(pcis->vec1_N);
9186:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9187:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9188:     VecDuplicate(pcis->vec2_B,&vec_check_B);
9189:     VecCopy(pcis->vec2_B,vec_check_B);

9191:     /* assemble rhs for coarse problem */
9192:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9193:     /* local with Schur */
9194:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9195:     if (zerodiag) {
9196:       VecGetArray(dummy_vec,&array);
9197:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9198:       VecRestoreArray(dummy_vec,&array);
9199:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9200:     }
9201:     /* sum on primal nodes the local contributions */
9202:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9203:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9204:     VecGetArray(pcis->vec1_N,&array);
9205:     VecGetArray(pcbddc->vec1_P,&array2);
9206:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9207:     VecRestoreArray(pcbddc->vec1_P,&array2);
9208:     VecRestoreArray(pcis->vec1_N,&array);
9209:     VecSet(pcbddc->coarse_vec,0.);
9210:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9211:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9212:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9213:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9214:     VecGetArray(pcbddc->vec1_P,&array);
9215:     /* scale primal nodes (BDDC sums contibutions) */
9216:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9217:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9218:     VecRestoreArray(pcbddc->vec1_P,&array);
9219:     VecAssemblyBegin(pcis->vec1_N);
9220:     VecAssemblyEnd(pcis->vec1_N);
9221:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9222:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9223:     /* global: \widetilde{B0}_B w_\Gamma */
9224:     if (zerodiag) {
9225:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
9226:       VecGetArray(dummy_vec,&array);
9227:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9228:       VecRestoreArray(dummy_vec,&array);
9229:     }
9230:     /* BDDC */
9231:     VecSet(pcis->vec1_D,0.);
9232:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

9234:     VecCopy(pcis->vec1_B,pcis->vec2_B);
9235:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9236:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9237:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9238:     for (i=0;i<pcbddc->benign_n;i++) {
9239:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9240:     }
9241:     PetscFree(p0_check);
9242:     VecDestroy(&vec_scale_P);
9243:     VecDestroy(&vec_check_B);
9244:     VecDestroy(&dummy_vec);
9245:     MatDestroy(&S_j);
9246:     MatDestroy(&B0_B);
9247:   }
9248:   return(0);
9249: }

9251:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
9252: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9253: {
9254:   Mat            At;
9255:   IS             rows;
9256:   PetscInt       rst,ren;
9258:   PetscLayout    rmap;

9261:   rst = ren = 0;
9262:   if (ccomm != MPI_COMM_NULL) {
9263:     PetscLayoutCreate(ccomm,&rmap);
9264:     PetscLayoutSetSize(rmap,A->rmap->N);
9265:     PetscLayoutSetBlockSize(rmap,1);
9266:     PetscLayoutSetUp(rmap);
9267:     PetscLayoutGetRange(rmap,&rst,&ren);
9268:   }
9269:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9270:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9271:   ISDestroy(&rows);

9273:   if (ccomm != MPI_COMM_NULL) {
9274:     Mat_MPIAIJ *a,*b;
9275:     IS         from,to;
9276:     Vec        gvec;
9277:     PetscInt   lsize;

9279:     MatCreate(ccomm,B);
9280:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9281:     MatSetType(*B,MATAIJ);
9282:     PetscLayoutDestroy(&((*B)->rmap));
9283:     PetscLayoutSetUp((*B)->cmap);
9284:     a    = (Mat_MPIAIJ*)At->data;
9285:     b    = (Mat_MPIAIJ*)(*B)->data;
9286:     MPI_Comm_size(ccomm,&b->size);
9287:     MPI_Comm_rank(ccomm,&b->rank);
9288:     PetscObjectReference((PetscObject)a->A);
9289:     PetscObjectReference((PetscObject)a->B);
9290:     b->A = a->A;
9291:     b->B = a->B;

9293:     b->donotstash      = a->donotstash;
9294:     b->roworiented     = a->roworiented;
9295:     b->rowindices      = 0;
9296:     b->rowvalues       = 0;
9297:     b->getrowactive    = PETSC_FALSE;

9299:     (*B)->rmap         = rmap;
9300:     (*B)->factortype   = A->factortype;
9301:     (*B)->assembled    = PETSC_TRUE;
9302:     (*B)->insertmode   = NOT_SET_VALUES;
9303:     (*B)->preallocated = PETSC_TRUE;

9305:     if (a->colmap) {
9306: #if defined(PETSC_USE_CTABLE)
9307:       PetscTableCreateCopy(a->colmap,&b->colmap);
9308: #else
9309:       PetscMalloc1(At->cmap->N,&b->colmap);
9310:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9311:       PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
9312: #endif
9313:     } else b->colmap = 0;
9314:     if (a->garray) {
9315:       PetscInt len;
9316:       len  = a->B->cmap->n;
9317:       PetscMalloc1(len+1,&b->garray);
9318:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9319:       if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
9320:     } else b->garray = 0;

9322:     PetscObjectReference((PetscObject)a->lvec);
9323:     b->lvec = a->lvec;
9324:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

9326:     /* cannot use VecScatterCopy */
9327:     VecGetLocalSize(b->lvec,&lsize);
9328:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9329:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9330:     MatCreateVecs(*B,&gvec,NULL);
9331:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9332:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9333:     ISDestroy(&from);
9334:     ISDestroy(&to);
9335:     VecDestroy(&gvec);
9336:   }
9337:   MatDestroy(&At);
9338:   return(0);
9339: }