Actual source code: bddcprivate.c

petsc-3.9.1 2018-04-29
Report Typos and Errors
  1:  #include <../src/mat/impls/aij/seq/aij.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  3:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>
  5:  #include <petscdmplex.h>
  6:  #include <petscblaslapack.h>
  7:  #include <petsc/private/sfimpl.h>
  8:  #include <petsc/private/dmpleximpl.h>
  9:  #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17: #if !defined(PETSC_USE_COMPLEX)
 18:   PetscScalar    *uwork,*data,*U, ds = 0.;
 19:   PetscReal      *sing;
 20:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 21:   PetscInt       ulw,i,nr,nc,n;

 25: #if defined(PETSC_MISSING_LAPACK_GESVD)
 26:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
 27: #else
 28:   MatGetSize(A,&nr,&nc);
 29:   if (!nr || !nc) return(0);

 31:   /* workspace */
 32:   if (!work) {
 33:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 34:     PetscMalloc1(ulw,&uwork);
 35:   } else {
 36:     ulw   = lw;
 37:     uwork = work;
 38:   }
 39:   n = PetscMin(nr,nc);
 40:   if (!rwork) {
 41:     PetscMalloc1(n,&sing);
 42:   } else {
 43:     sing = rwork;
 44:   }

 46:   /* SVD */
 47:   PetscMalloc1(nr*nr,&U);
 48:   PetscBLASIntCast(nr,&bM);
 49:   PetscBLASIntCast(nc,&bN);
 50:   PetscBLASIntCast(ulw,&lwork);
 51:   MatDenseGetArray(A,&data);
 52:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 53:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 54:   PetscFPTrapPop();
 55:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
 56:   MatDenseRestoreArray(A,&data);
 57:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 58:   if (!rwork) {
 59:     PetscFree(sing);
 60:   }
 61:   if (!work) {
 62:     PetscFree(uwork);
 63:   }
 64:   /* create B */
 65:   if (!range) {
 66:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 67:     MatDenseGetArray(*B,&data);
 68:     PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
 69:   } else {
 70:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 71:     MatDenseGetArray(*B,&data);
 72:     PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
 73:   }
 74:   MatDenseRestoreArray(*B,&data);
 75:   PetscFree(U);
 76: #endif
 77: #else /* PETSC_USE_COMPLEX */
 79:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
 80: #endif
 81:   return(0);
 82: }

 84: /* TODO REMOVE */
 85: #if defined(PRINT_GDET)
 86: static int inc = 0;
 87: static int lev = 0;
 88: #endif

 90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 91: {
 93:   Mat            GE,GEd;
 94:   PetscInt       rsize,csize,esize;
 95:   PetscScalar    *ptr;

 98:   ISGetSize(edge,&esize);
 99:   if (!esize) return(0);
100:   ISGetSize(extrow,&rsize);
101:   ISGetSize(extcol,&csize);

103:   /* gradients */
104:   ptr  = work + 5*esize;
105:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108:   MatDestroy(&GE);

110:   /* constants */
111:   ptr += rsize*csize;
112:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115:   MatDestroy(&GE);
116:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117:   MatDestroy(&GEd);

119:   if (corners) {
120:     Mat            GEc;
121:     PetscScalar    *vals,v;

123:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
124:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
125:     MatDenseGetArray(GEd,&vals);
126:     /* v    = PetscAbsScalar(vals[0]) */;
127:     v    = 1.;
128:     cvals[0] = vals[0]/v;
129:     cvals[1] = vals[1]/v;
130:     MatDenseRestoreArray(GEd,&vals);
131:     MatScale(*GKins,1./v);
132: #if defined(PRINT_GDET)
133:     {
134:       PetscViewer viewer;
135:       char filename[256];
136:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
137:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
138:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
139:       PetscObjectSetName((PetscObject)GEc,"GEc");
140:       MatView(GEc,viewer);
141:       PetscObjectSetName((PetscObject)(*GKins),"GK");
142:       MatView(*GKins,viewer);
143:       PetscObjectSetName((PetscObject)GEd,"Gproj");
144:       MatView(GEd,viewer);
145:       PetscViewerDestroy(&viewer);
146:     }
147: #endif
148:     MatDestroy(&GEd);
149:     MatDestroy(&GEc);
150:   }

152:   return(0);
153: }

155: PetscErrorCode PCBDDCNedelecSupport(PC pc)
156: {
157:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
158:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
159:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
160:   Vec                    tvec;
161:   PetscSF                sfv;
162:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
163:   MPI_Comm               comm;
164:   IS                     lned,primals,allprimals,nedfieldlocal;
165:   IS                     *eedges,*extrows,*extcols,*alleedges;
166:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
167:   PetscScalar            *vals,*work;
168:   PetscReal              *rwork;
169:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
170:   PetscInt               ne,nv,Lv,order,n,field;
171:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
172:   PetscInt               i,j,extmem,cum,maxsize,nee;
173:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
174:   PetscInt               *sfvleaves,*sfvroots;
175:   PetscInt               *corners,*cedges;
176:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
177: #if defined(PETSC_USE_DEBUG)
178:   PetscInt               *emarks;
179: #endif
180:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
181:   PetscErrorCode         ierr;

184:   /* If the discrete gradient is defined for a subset of dofs and global is true,
185:      it assumes G is given in global ordering for all the dofs.
186:      Otherwise, the ordering is global for the Nedelec field */
187:   order      = pcbddc->nedorder;
188:   conforming = pcbddc->conforming;
189:   field      = pcbddc->nedfield;
190:   global     = pcbddc->nedglobal;
191:   setprimal  = PETSC_FALSE;
192:   print      = PETSC_FALSE;
193:   singular   = PETSC_FALSE;

195:   /* Command line customization */
196:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
197:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
198:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
199:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
200:   /* print debug info TODO: to be removed */
201:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
202:   PetscOptionsEnd();

204:   /* Return if there are no edges in the decomposition and the problem is not singular */
205:   MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
206:   ISLocalToGlobalMappingGetSize(al2g,&n);
207:   PetscObjectGetComm((PetscObject)pc,&comm);
208:   if (!singular) {
209:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
210:     lrc[0] = PETSC_FALSE;
211:     for (i=0;i<n;i++) {
212:       if (PetscRealPart(vals[i]) > 2.) {
213:         lrc[0] = PETSC_TRUE;
214:         break;
215:       }
216:     }
217:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
218:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
219:     if (!lrc[1]) return(0);
220:   }

222:   /* Get Nedelec field */
223:   MatISSetUpSF(pc->pmat);
224:   if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal);
225:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
226:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
227:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
228:     ISGetLocalSize(nedfieldlocal,&ne);
229:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
230:     ne            = n;
231:     nedfieldlocal = NULL;
232:     global        = PETSC_TRUE;
233:   } else if (field == PETSC_DECIDE) {
234:     PetscInt rst,ren,*idx;

236:     PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
237:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
238:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
239:     for (i=rst;i<ren;i++) {
240:       PetscInt nc;

242:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
244:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
245:     }
246:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
248:     PetscMalloc1(n,&idx);
249:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
250:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
251:   } else {
252:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
253:   }

255:   /* Sanity checks */
256:   if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
257:   if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
258:   if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order);

260:   /* Just set primal dofs and return */
261:   if (setprimal) {
262:     IS       enedfieldlocal;
263:     PetscInt *eidxs;

265:     PetscMalloc1(ne,&eidxs);
266:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
267:     if (nedfieldlocal) {
268:       ISGetIndices(nedfieldlocal,&idxs);
269:       for (i=0,cum=0;i<ne;i++) {
270:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
271:           eidxs[cum++] = idxs[i];
272:         }
273:       }
274:       ISRestoreIndices(nedfieldlocal,&idxs);
275:     } else {
276:       for (i=0,cum=0;i<ne;i++) {
277:         if (PetscRealPart(vals[i]) > 2.) {
278:           eidxs[cum++] = i;
279:         }
280:       }
281:     }
282:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
283:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
284:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
285:     PetscFree(eidxs);
286:     ISDestroy(&nedfieldlocal);
287:     ISDestroy(&enedfieldlocal);
288:     return(0);
289:   }

291:   /* Compute some l2g maps */
292:   if (nedfieldlocal) {
293:     IS is;

295:     /* need to map from the local Nedelec field to local numbering */
296:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
297:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
298:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
299:     ISLocalToGlobalMappingCreateIS(is,&al2g);
300:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
301:     if (global) {
302:       PetscObjectReference((PetscObject)al2g);
303:       el2g = al2g;
304:     } else {
305:       IS gis;

307:       ISRenumber(is,NULL,NULL,&gis);
308:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
309:       ISDestroy(&gis);
310:     }
311:     ISDestroy(&is);
312:   } else {
313:     /* restore default */
314:     pcbddc->nedfield = -1;
315:     /* one ref for the destruction of al2g, one for el2g */
316:     PetscObjectReference((PetscObject)al2g);
317:     PetscObjectReference((PetscObject)al2g);
318:     el2g = al2g;
319:     fl2g = NULL;
320:   }

322:   /* Start communication to drop connections for interior edges (for cc analysis only) */
323:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
324:   PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
325:   if (nedfieldlocal) {
326:     ISGetIndices(nedfieldlocal,&idxs);
327:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
328:     ISRestoreIndices(nedfieldlocal,&idxs);
329:   } else {
330:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
331:   }
332:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
333:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

335:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
336:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
337:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
338:     if (global) {
339:       PetscInt rst;

341:       MatGetOwnershipRange(G,&rst,NULL);
342:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
343:         if (matis->sf_rootdata[i] < 2) {
344:           matis->sf_rootdata[cum++] = i + rst;
345:         }
346:       }
347:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
348:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
349:     } else {
350:       PetscInt *tbz;

352:       PetscMalloc1(ne,&tbz);
353:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
355:       ISGetIndices(nedfieldlocal,&idxs);
356:       for (i=0,cum=0;i<ne;i++)
357:         if (matis->sf_leafdata[idxs[i]] == 1)
358:           tbz[cum++] = i;
359:       ISRestoreIndices(nedfieldlocal,&idxs);
360:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
361:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
362:       PetscFree(tbz);
363:     }
364:   } else { /* we need the entire G to infer the nullspace */
365:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
366:     G    = pcbddc->discretegradient;
367:   }

369:   /* Extract subdomain relevant rows of G */
370:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
371:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
372:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
373:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
374:   ISDestroy(&lned);
375:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
376:   MatDestroy(&lGall);
377:   MatISGetLocalMat(lGis,&lG);

379:   /* SF for nodal dofs communications */
380:   MatGetLocalSize(G,NULL,&Lv);
381:   MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
382:   PetscObjectReference((PetscObject)vl2g);
383:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
384:   PetscSFCreate(comm,&sfv);
385:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
386:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
387:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
388:   i    = singular ? 2 : 1;
389:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

391:   /* Destroy temporary G created in MATIS format and modified G */
392:   PetscObjectReference((PetscObject)lG);
393:   MatDestroy(&lGis);
394:   MatDestroy(&G);

396:   if (print) {
397:     PetscObjectSetName((PetscObject)lG,"initial_lG");
398:     MatView(lG,NULL);
399:   }

401:   /* Save lG for values insertion in change of basis */
402:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

404:   /* Analyze the edge-nodes connections (duplicate lG) */
405:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
406:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
407:   PetscBTCreate(nv,&btv);
408:   PetscBTCreate(ne,&bte);
409:   PetscBTCreate(ne,&btb);
410:   PetscBTCreate(ne,&btbd);
411:   PetscBTCreate(nv,&btvcand);
412:   /* need to import the boundary specification to ensure the
413:      proper detection of coarse edges' endpoints */
414:   if (pcbddc->DirichletBoundariesLocal) {
415:     IS is;

417:     if (fl2g) {
418:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
419:     } else {
420:       is = pcbddc->DirichletBoundariesLocal;
421:     }
422:     ISGetLocalSize(is,&cum);
423:     ISGetIndices(is,&idxs);
424:     for (i=0;i<cum;i++) {
425:       if (idxs[i] >= 0) {
426:         PetscBTSet(btb,idxs[i]);
427:         PetscBTSet(btbd,idxs[i]);
428:       }
429:     }
430:     ISRestoreIndices(is,&idxs);
431:     if (fl2g) {
432:       ISDestroy(&is);
433:     }
434:   }
435:   if (pcbddc->NeumannBoundariesLocal) {
436:     IS is;

438:     if (fl2g) {
439:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
440:     } else {
441:       is = pcbddc->NeumannBoundariesLocal;
442:     }
443:     ISGetLocalSize(is,&cum);
444:     ISGetIndices(is,&idxs);
445:     for (i=0;i<cum;i++) {
446:       if (idxs[i] >= 0) {
447:         PetscBTSet(btb,idxs[i]);
448:       }
449:     }
450:     ISRestoreIndices(is,&idxs);
451:     if (fl2g) {
452:       ISDestroy(&is);
453:     }
454:   }

456:   /* Count neighs per dof */
457:   PetscCalloc1(ne,&ecount);
458:   PetscMalloc1(ne,&eneighs);
459:   ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
460:   for (i=1,cum=0;i<n_neigh;i++) {
461:     cum += n_shared[i];
462:     for (j=0;j<n_shared[i];j++) {
463:       ecount[shared[i][j]]++;
464:     }
465:   }
466:   if (ne) {
467:     PetscMalloc1(cum,&eneighs[0]);
468:   }
469:   for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1];
470:   PetscMemzero(ecount,ne*sizeof(PetscInt));
471:   for (i=1;i<n_neigh;i++) {
472:     for (j=0;j<n_shared[i];j++) {
473:       PetscInt k = shared[i][j];
474:       eneighs[k][ecount[k]] = neigh[i];
475:       ecount[k]++;
476:     }
477:   }
478:   for (i=0;i<ne;i++) {
479:     PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);
480:   }
481:   ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
482:   PetscCalloc1(nv,&vcount);
483:   PetscMalloc1(nv,&vneighs);
484:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
485:   for (i=1,cum=0;i<n_neigh;i++) {
486:     cum += n_shared[i];
487:     for (j=0;j<n_shared[i];j++) {
488:       vcount[shared[i][j]]++;
489:     }
490:   }
491:   if (nv) {
492:     PetscMalloc1(cum,&vneighs[0]);
493:   }
494:   for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1];
495:   PetscMemzero(vcount,nv*sizeof(PetscInt));
496:   for (i=1;i<n_neigh;i++) {
497:     for (j=0;j<n_shared[i];j++) {
498:       PetscInt k = shared[i][j];
499:       vneighs[k][vcount[k]] = neigh[i];
500:       vcount[k]++;
501:     }
502:   }
503:   for (i=0;i<nv;i++) {
504:     PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);
505:   }
506:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

508:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
509:      for proper detection of coarse edges' endpoints */
510:   PetscBTCreate(ne,&btee);
511:   for (i=0;i<ne;i++) {
512:     if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) {
513:       PetscBTSet(btee,i);
514:     }
515:   }
516:   PetscMalloc1(ne,&marks);
517:   if (!conforming) {
518:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
519:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
520:   }
521:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
522:   MatSeqAIJGetArray(lGe,&vals);
523:   cum  = 0;
524:   for (i=0;i<ne;i++) {
525:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
526:     if (!PetscBTLookup(btee,i)) {
527:       marks[cum++] = i;
528:       continue;
529:     }
530:     /* set badly connected edge dofs as primal */
531:     if (!conforming) {
532:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
533:         marks[cum++] = i;
534:         PetscBTSet(bte,i);
535:         for (j=ii[i];j<ii[i+1];j++) {
536:           PetscBTSet(btv,jj[j]);
537:         }
538:       } else {
539:         /* every edge dofs should be connected trough a certain number of nodal dofs
540:            to other edge dofs belonging to coarse edges
541:            - at most 2 endpoints
542:            - order-1 interior nodal dofs
543:            - no undefined nodal dofs (nconn < order)
544:         */
545:         PetscInt ends = 0,ints = 0, undef = 0;
546:         for (j=ii[i];j<ii[i+1];j++) {
547:           PetscInt v = jj[j],k;
548:           PetscInt nconn = iit[v+1]-iit[v];
549:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
550:           if (nconn > order) ends++;
551:           else if (nconn == order) ints++;
552:           else undef++;
553:         }
554:         if (undef || ends > 2 || ints != order -1) {
555:           marks[cum++] = i;
556:           PetscBTSet(bte,i);
557:           for (j=ii[i];j<ii[i+1];j++) {
558:             PetscBTSet(btv,jj[j]);
559:           }
560:         }
561:       }
562:     }
563:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
564:     if (!order && ii[i+1] != ii[i]) {
565:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
566:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
567:     }
568:   }
569:   PetscBTDestroy(&btee);
570:   MatSeqAIJRestoreArray(lGe,&vals);
571:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
572:   if (!conforming) {
573:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
574:     MatDestroy(&lGt);
575:   }
576:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

578:   /* identify splitpoints and corner candidates */
579:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
580:   if (print) {
581:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
582:     MatView(lGe,NULL);
583:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
584:     MatView(lGt,NULL);
585:   }
586:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
587:   MatSeqAIJGetArray(lGt,&vals);
588:   for (i=0;i<nv;i++) {
589:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
590:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
591:     if (!order) { /* variable order */
592:       PetscReal vorder = 0.;

594:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
595:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
596:       if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test);
597:       ord  = 1;
598:     }
599: #if defined(PETSC_USE_DEBUG)
600:     if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord);
601: #endif
602:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
603:       if (PetscBTLookup(btbd,jj[j])) {
604:         bdir = PETSC_TRUE;
605:         break;
606:       }
607:       if (vc != ecount[jj[j]]) {
608:         sneighs = PETSC_FALSE;
609:       } else {
610:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
611:         for (k=0;k<vc;k++) {
612:           if (vn[k] != en[k]) {
613:             sneighs = PETSC_FALSE;
614:             break;
615:           }
616:         }
617:       }
618:     }
619:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
620:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir);
621:       PetscBTSet(btv,i);
622:     } else if (test == ord) {
623:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
624:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i);
625:         PetscBTSet(btv,i);
626:       } else {
627:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i);
628:         PetscBTSet(btvcand,i);
629:       }
630:     }
631:   }
632:   PetscFree(ecount);
633:   PetscFree(vcount);
634:   if (ne) {
635:     PetscFree(eneighs[0]);
636:   }
637:   if (nv) {
638:     PetscFree(vneighs[0]);
639:   }
640:   PetscFree(eneighs);
641:   PetscFree(vneighs);
642:   PetscBTDestroy(&btbd);

644:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
645:   if (order != 1) {
646:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
647:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
648:     for (i=0;i<nv;i++) {
649:       if (PetscBTLookup(btvcand,i)) {
650:         PetscBool found = PETSC_FALSE;
651:         for (j=ii[i];j<ii[i+1] && !found;j++) {
652:           PetscInt k,e = jj[j];
653:           if (PetscBTLookup(bte,e)) continue;
654:           for (k=iit[e];k<iit[e+1];k++) {
655:             PetscInt v = jjt[k];
656:             if (v != i && PetscBTLookup(btvcand,v)) {
657:               found = PETSC_TRUE;
658:               break;
659:             }
660:           }
661:         }
662:         if (!found) {
663:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %d CLEARED\n",i);
664:           PetscBTClear(btvcand,i);
665:         } else {
666:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %d ACCEPTED\n",i);
667:         }
668:       }
669:     }
670:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
671:   }
672:   MatSeqAIJRestoreArray(lGt,&vals);
673:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
674:   MatDestroy(&lGe);

676:   /* Get the local G^T explicitly */
677:   MatDestroy(&lGt);
678:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
679:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

681:   /* Mark interior nodal dofs */
682:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
683:   PetscBTCreate(nv,&btvi);
684:   for (i=1;i<n_neigh;i++) {
685:     for (j=0;j<n_shared[i];j++) {
686:       PetscBTSet(btvi,shared[i][j]);
687:     }
688:   }
689:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

691:   /* communicate corners and splitpoints */
692:   PetscMalloc1(nv,&vmarks);
693:   PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
694:   PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
695:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

697:   if (print) {
698:     IS tbz;

700:     cum = 0;
701:     for (i=0;i<nv;i++)
702:       if (sfvleaves[i])
703:         vmarks[cum++] = i;

705:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
706:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
707:     ISView(tbz,NULL);
708:     ISDestroy(&tbz);
709:   }

711:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
712:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
713:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
714:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);

716:   /* Zero rows of lGt corresponding to identified corners
717:      and interior nodal dofs */
718:   cum = 0;
719:   for (i=0;i<nv;i++) {
720:     if (sfvleaves[i]) {
721:       vmarks[cum++] = i;
722:       PetscBTSet(btv,i);
723:     }
724:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
725:   }
726:   PetscBTDestroy(&btvi);
727:   if (print) {
728:     IS tbz;

730:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
731:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
732:     ISView(tbz,NULL);
733:     ISDestroy(&tbz);
734:   }
735:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
736:   PetscFree(vmarks);
737:   PetscSFDestroy(&sfv);
738:   PetscFree2(sfvleaves,sfvroots);

740:   /* Recompute G */
741:   MatDestroy(&lG);
742:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
743:   if (print) {
744:     PetscObjectSetName((PetscObject)lG,"used_lG");
745:     MatView(lG,NULL);
746:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
747:     MatView(lGt,NULL);
748:   }

750:   /* Get primal dofs (if any) */
751:   cum = 0;
752:   for (i=0;i<ne;i++) {
753:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
754:   }
755:   if (fl2g) {
756:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
757:   }
758:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
759:   if (print) {
760:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
761:     ISView(primals,NULL);
762:   }
763:   PetscBTDestroy(&bte);
764:   /* TODO: what if the user passed in some of them ?  */
765:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
766:   ISDestroy(&primals);

768:   /* Compute edge connectivity */
769:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
770:   MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
771:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
772:   if (fl2g) {
773:     PetscBT   btf;
774:     PetscInt  *iia,*jja,*iiu,*jju;
775:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

777:     /* create CSR for all local dofs */
778:     PetscMalloc1(n+1,&iia);
779:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
780:       if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n);
781:       iiu = pcbddc->mat_graph->xadj;
782:       jju = pcbddc->mat_graph->adjncy;
783:     } else if (pcbddc->use_local_adj) {
784:       rest = PETSC_TRUE;
785:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
786:     } else {
787:       free   = PETSC_TRUE;
788:       PetscMalloc2(n+1,&iiu,n,&jju);
789:       iiu[0] = 0;
790:       for (i=0;i<n;i++) {
791:         iiu[i+1] = i+1;
792:         jju[i]   = -1;
793:       }
794:     }

796:     /* import sizes of CSR */
797:     iia[0] = 0;
798:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

800:     /* overwrite entries corresponding to the Nedelec field */
801:     PetscBTCreate(n,&btf);
802:     ISGetIndices(nedfieldlocal,&idxs);
803:     for (i=0;i<ne;i++) {
804:       PetscBTSet(btf,idxs[i]);
805:       iia[idxs[i]+1] = ii[i+1]-ii[i];
806:     }

808:     /* iia in CSR */
809:     for (i=0;i<n;i++) iia[i+1] += iia[i];

811:     /* jja in CSR */
812:     PetscMalloc1(iia[n],&jja);
813:     for (i=0;i<n;i++)
814:       if (!PetscBTLookup(btf,i))
815:         for (j=0;j<iiu[i+1]-iiu[i];j++)
816:           jja[iia[i]+j] = jju[iiu[i]+j];

818:     /* map edge dofs connectivity */
819:     if (jj) {
820:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
821:       for (i=0;i<ne;i++) {
822:         PetscInt e = idxs[i];
823:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
824:       }
825:     }
826:     ISRestoreIndices(nedfieldlocal,&idxs);
827:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
828:     if (rest) {
829:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
830:     }
831:     if (free) {
832:       PetscFree2(iiu,jju);
833:     }
834:     PetscBTDestroy(&btf);
835:   } else {
836:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
837:   }

839:   /* Analyze interface for edge dofs */
840:   PCBDDCAnalyzeInterface(pc);
841:   pcbddc->mat_graph->twodim = PETSC_FALSE;

843:   /* Get coarse edges in the edge space */
844:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
845:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

847:   if (fl2g) {
848:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
849:     PetscMalloc1(nee,&eedges);
850:     for (i=0;i<nee;i++) {
851:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
852:     }
853:   } else {
854:     eedges  = alleedges;
855:     primals = allprimals;
856:   }

858:   /* Mark fine edge dofs with their coarse edge id */
859:   PetscMemzero(marks,ne*sizeof(PetscInt));
860:   ISGetLocalSize(primals,&cum);
861:   ISGetIndices(primals,&idxs);
862:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
863:   ISRestoreIndices(primals,&idxs);
864:   if (print) {
865:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
866:     ISView(primals,NULL);
867:   }

869:   maxsize = 0;
870:   for (i=0;i<nee;i++) {
871:     PetscInt size,mark = i+1;

873:     ISGetLocalSize(eedges[i],&size);
874:     ISGetIndices(eedges[i],&idxs);
875:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
876:     ISRestoreIndices(eedges[i],&idxs);
877:     maxsize = PetscMax(maxsize,size);
878:   }

880:   /* Find coarse edge endpoints */
881:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
882:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
883:   for (i=0;i<nee;i++) {
884:     PetscInt mark = i+1,size;

886:     ISGetLocalSize(eedges[i],&size);
887:     if (!size && nedfieldlocal) continue;
888:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
889:     ISGetIndices(eedges[i],&idxs);
890:     if (print) {
891:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i);
892:       ISView(eedges[i],NULL);
893:     }
894:     for (j=0;j<size;j++) {
895:       PetscInt k, ee = idxs[j];
896:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %d\n",ee);
897:       for (k=ii[ee];k<ii[ee+1];k++) {
898:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %d\n",jj[k]);
899:         if (PetscBTLookup(btv,jj[k])) {
900:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %d\n",jj[k]);
901:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
902:           PetscInt  k2;
903:           PetscBool corner = PETSC_FALSE;
904:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
905:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
906:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
907:                if the edge dof lie on the natural part of the boundary */
908:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
909:               corner = PETSC_TRUE;
910:               break;
911:             }
912:           }
913:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
914:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %d\n",jj[k]);
915:             PetscBTSet(btv,jj[k]);
916:           } else {
917:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
918:           }
919:         }
920:       }
921:     }
922:     ISRestoreIndices(eedges[i],&idxs);
923:   }
924:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
925:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
926:   PetscBTDestroy(&btb);

928:   /* Reset marked primal dofs */
929:   ISGetLocalSize(primals,&cum);
930:   ISGetIndices(primals,&idxs);
931:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
932:   ISRestoreIndices(primals,&idxs);

934:   /* Now use the initial lG */
935:   MatDestroy(&lG);
936:   MatDestroy(&lGt);
937:   lG   = lGinit;
938:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

940:   /* Compute extended cols indices */
941:   PetscBTCreate(nv,&btvc);
942:   PetscBTCreate(nee,&bter);
943:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
944:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
945:   i   *= maxsize;
946:   PetscCalloc1(nee,&extcols);
947:   PetscMalloc2(i,&extrow,i,&gidxs);
948:   eerr = PETSC_FALSE;
949:   for (i=0;i<nee;i++) {
950:     PetscInt size,found = 0;

952:     cum  = 0;
953:     ISGetLocalSize(eedges[i],&size);
954:     if (!size && nedfieldlocal) continue;
955:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
956:     ISGetIndices(eedges[i],&idxs);
957:     PetscBTMemzero(nv,btvc);
958:     for (j=0;j<size;j++) {
959:       PetscInt k,ee = idxs[j];
960:       for (k=ii[ee];k<ii[ee+1];k++) {
961:         PetscInt vv = jj[k];
962:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
963:         else if (!PetscBTLookupSet(btvc,vv)) found++;
964:       }
965:     }
966:     ISRestoreIndices(eedges[i],&idxs);
967:     PetscSortRemoveDupsInt(&cum,extrow);
968:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
969:     PetscSortIntWithArray(cum,gidxs,extrow);
970:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
971:     /* it may happen that endpoints are not defined at this point
972:        if it is the case, mark this edge for a second pass */
973:     if (cum != size -1 || found != 2) {
974:       PetscBTSet(bter,i);
975:       if (print) {
976:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
977:         ISView(eedges[i],NULL);
978:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
979:         ISView(extcols[i],NULL);
980:       }
981:       eerr = PETSC_TRUE;
982:     }
983:   }
984:   /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
985:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
986:   if (done) {
987:     PetscInt *newprimals;

989:     PetscMalloc1(ne,&newprimals);
990:     ISGetLocalSize(primals,&cum);
991:     ISGetIndices(primals,&idxs);
992:     PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
993:     ISRestoreIndices(primals,&idxs);
994:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
995:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr);
996:     for (i=0;i<nee;i++) {
997:       PetscBool has_candidates = PETSC_FALSE;
998:       if (PetscBTLookup(bter,i)) {
999:         PetscInt size,mark = i+1;

1001:         ISGetLocalSize(eedges[i],&size);
1002:         ISGetIndices(eedges[i],&idxs);
1003:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1004:         for (j=0;j<size;j++) {
1005:           PetscInt k,ee = idxs[j];
1006:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]);
1007:           for (k=ii[ee];k<ii[ee+1];k++) {
1008:             /* set all candidates located on the edge as corners */
1009:             if (PetscBTLookup(btvcand,jj[k])) {
1010:               PetscInt k2,vv = jj[k];
1011:               has_candidates = PETSC_TRUE;
1012:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %d\n",vv);
1013:               PetscBTSet(btv,vv);
1014:               /* set all edge dofs connected to candidate as primals */
1015:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
1016:                 if (marks[jjt[k2]] == mark) {
1017:                   PetscInt k3,ee2 = jjt[k2];
1018:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %d\n",ee2);
1019:                   newprimals[cum++] = ee2;
1020:                   /* finally set the new corners */
1021:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
1022:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %d\n",jj[k3]);
1023:                     PetscBTSet(btv,jj[k3]);
1024:                   }
1025:                 }
1026:               }
1027:             } else {
1028:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %d\n",jj[k]);
1029:             }
1030:           }
1031:         }
1032:         if (!has_candidates) { /* circular edge */
1033:           PetscInt k, ee = idxs[0],*tmarks;

1035:           PetscCalloc1(ne,&tmarks);
1036:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %d\n",i);
1037:           for (k=ii[ee];k<ii[ee+1];k++) {
1038:             PetscInt k2;
1039:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %d\n",jj[k]);
1040:             PetscBTSet(btv,jj[k]);
1041:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
1042:           }
1043:           for (j=0;j<size;j++) {
1044:             if (tmarks[idxs[j]] > 1) {
1045:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %d\n",idxs[j]);
1046:               newprimals[cum++] = idxs[j];
1047:             }
1048:           }
1049:           PetscFree(tmarks);
1050:         }
1051:         ISRestoreIndices(eedges[i],&idxs);
1052:       }
1053:       ISDestroy(&extcols[i]);
1054:     }
1055:     PetscFree(extcols);
1056:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1057:     PetscSortRemoveDupsInt(&cum,newprimals);
1058:     if (fl2g) {
1059:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1060:       ISDestroy(&primals);
1061:       for (i=0;i<nee;i++) {
1062:         ISDestroy(&eedges[i]);
1063:       }
1064:       PetscFree(eedges);
1065:     }
1066:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1067:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1068:     PetscFree(newprimals);
1069:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1070:     ISDestroy(&primals);
1071:     PCBDDCAnalyzeInterface(pc);
1072:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1073:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1074:     if (fl2g) {
1075:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1076:       PetscMalloc1(nee,&eedges);
1077:       for (i=0;i<nee;i++) {
1078:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1079:       }
1080:     } else {
1081:       eedges  = alleedges;
1082:       primals = allprimals;
1083:     }
1084:     PetscCalloc1(nee,&extcols);

1086:     /* Mark again */
1087:     PetscMemzero(marks,ne*sizeof(PetscInt));
1088:     for (i=0;i<nee;i++) {
1089:       PetscInt size,mark = i+1;

1091:       ISGetLocalSize(eedges[i],&size);
1092:       ISGetIndices(eedges[i],&idxs);
1093:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1094:       ISRestoreIndices(eedges[i],&idxs);
1095:     }
1096:     if (print) {
1097:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1098:       ISView(primals,NULL);
1099:     }

1101:     /* Recompute extended cols */
1102:     eerr = PETSC_FALSE;
1103:     for (i=0;i<nee;i++) {
1104:       PetscInt size;

1106:       cum  = 0;
1107:       ISGetLocalSize(eedges[i],&size);
1108:       if (!size && nedfieldlocal) continue;
1109:       if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1110:       ISGetIndices(eedges[i],&idxs);
1111:       for (j=0;j<size;j++) {
1112:         PetscInt k,ee = idxs[j];
1113:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1114:       }
1115:       ISRestoreIndices(eedges[i],&idxs);
1116:       PetscSortRemoveDupsInt(&cum,extrow);
1117:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1118:       PetscSortIntWithArray(cum,gidxs,extrow);
1119:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1120:       if (cum != size -1) {
1121:         if (print) {
1122:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1123:           ISView(eedges[i],NULL);
1124:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1125:           ISView(extcols[i],NULL);
1126:         }
1127:         eerr = PETSC_TRUE;
1128:       }
1129:     }
1130:   }
1131:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1132:   PetscFree2(extrow,gidxs);
1133:   PetscBTDestroy(&bter);
1134:   if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1135:   /* an error should not occur at this point */
1136:   if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1138:   /* Check the number of endpoints */
1139:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1140:   PetscMalloc1(2*nee,&corners);
1141:   PetscMalloc1(nee,&cedges);
1142:   for (i=0;i<nee;i++) {
1143:     PetscInt size, found = 0, gc[2];

1145:     /* init with defaults */
1146:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1147:     ISGetLocalSize(eedges[i],&size);
1148:     if (!size && nedfieldlocal) continue;
1149:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1150:     ISGetIndices(eedges[i],&idxs);
1151:     PetscBTMemzero(nv,btvc);
1152:     for (j=0;j<size;j++) {
1153:       PetscInt k,ee = idxs[j];
1154:       for (k=ii[ee];k<ii[ee+1];k++) {
1155:         PetscInt vv = jj[k];
1156:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1157:           if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i);
1158:           corners[i*2+found++] = vv;
1159:         }
1160:       }
1161:     }
1162:     if (found != 2) {
1163:       PetscInt e;
1164:       if (fl2g) {
1165:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1166:       } else {
1167:         e = idxs[0];
1168:       }
1169:       SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]);
1170:     }

1172:     /* get primal dof index on this coarse edge */
1173:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1174:     if (gc[0] > gc[1]) {
1175:       PetscInt swap  = corners[2*i];
1176:       corners[2*i]   = corners[2*i+1];
1177:       corners[2*i+1] = swap;
1178:     }
1179:     cedges[i] = idxs[size-1];
1180:     ISRestoreIndices(eedges[i],&idxs);
1181:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1182:   }
1183:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1184:   PetscBTDestroy(&btvc);

1186: #if defined(PETSC_USE_DEBUG)
1187:   /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1188:      not interfere with neighbouring coarse edges */
1189:   PetscMalloc1(nee+1,&emarks);
1190:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1191:   for (i=0;i<nv;i++) {
1192:     PetscInt emax = 0,eemax = 0;

1194:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1195:     PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1196:     for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1197:     for (j=1;j<nee+1;j++) {
1198:       if (emax < emarks[j]) {
1199:         emax = emarks[j];
1200:         eemax = j;
1201:       }
1202:     }
1203:     /* not relevant for edges */
1204:     if (!eemax) continue;

1206:     for (j=ii[i];j<ii[i+1];j++) {
1207:       if (marks[jj[j]] && marks[jj[j]] != eemax) {
1208:         SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]);
1209:       }
1210:     }
1211:   }
1212:   PetscFree(emarks);
1213:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1214: #endif

1216:   /* Compute extended rows indices for edge blocks of the change of basis */
1217:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1218:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1219:   extmem *= maxsize;
1220:   PetscMalloc1(extmem*nee,&extrow);
1221:   PetscMalloc1(nee,&extrows);
1222:   PetscCalloc1(nee,&extrowcum);
1223:   for (i=0;i<nv;i++) {
1224:     PetscInt mark = 0,size,start;

1226:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1227:     for (j=ii[i];j<ii[i+1];j++)
1228:       if (marks[jj[j]] && !mark)
1229:         mark = marks[jj[j]];

1231:     /* not relevant */
1232:     if (!mark) continue;

1234:     /* import extended row */
1235:     mark--;
1236:     start = mark*extmem+extrowcum[mark];
1237:     size = ii[i+1]-ii[i];
1238:     if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem);
1239:     PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1240:     extrowcum[mark] += size;
1241:   }
1242:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1243:   MatDestroy(&lGt);
1244:   PetscFree(marks);

1246:   /* Compress extrows */
1247:   cum  = 0;
1248:   for (i=0;i<nee;i++) {
1249:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1250:     PetscSortRemoveDupsInt(&size,start);
1251:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1252:     cum  = PetscMax(cum,size);
1253:   }
1254:   PetscFree(extrowcum);
1255:   PetscBTDestroy(&btv);
1256:   PetscBTDestroy(&btvcand);

1258:   /* Workspace for lapack inner calls and VecSetValues */
1259:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1261:   /* Create change of basis matrix (preallocation can be improved) */
1262:   MatCreate(comm,&T);
1263:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1264:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1265:   MatSetType(T,MATAIJ);
1266:   MatSeqAIJSetPreallocation(T,10,NULL);
1267:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1268:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1269:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1270:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1271:   ISLocalToGlobalMappingDestroy(&al2g);

1273:   /* Defaults to identity */
1274:   MatCreateVecs(pc->pmat,&tvec,NULL);
1275:   VecSet(tvec,1.0);
1276:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1277:   VecDestroy(&tvec);

1279:   /* Create discrete gradient for the coarser level if needed */
1280:   MatDestroy(&pcbddc->nedcG);
1281:   ISDestroy(&pcbddc->nedclocal);
1282:   if (pcbddc->current_level < pcbddc->max_levels) {
1283:     ISLocalToGlobalMapping cel2g,cvl2g;
1284:     IS                     wis,gwis;
1285:     PetscInt               cnv,cne;

1287:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1288:     if (fl2g) {
1289:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1290:     } else {
1291:       PetscObjectReference((PetscObject)wis);
1292:       pcbddc->nedclocal = wis;
1293:     }
1294:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1295:     ISDestroy(&wis);
1296:     ISRenumber(gwis,NULL,&cne,&wis);
1297:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1298:     ISDestroy(&wis);
1299:     ISDestroy(&gwis);

1301:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1302:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1303:     ISDestroy(&wis);
1304:     ISRenumber(gwis,NULL,&cnv,&wis);
1305:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1306:     ISDestroy(&wis);
1307:     ISDestroy(&gwis);

1309:     MatCreate(comm,&pcbddc->nedcG);
1310:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1311:     MatSetType(pcbddc->nedcG,MATAIJ);
1312:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1313:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1314:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1315:     ISLocalToGlobalMappingDestroy(&cel2g);
1316:     ISLocalToGlobalMappingDestroy(&cvl2g);
1317:   }
1318:   ISLocalToGlobalMappingDestroy(&vl2g);

1320: #if defined(PRINT_GDET)
1321:   inc = 0;
1322:   lev = pcbddc->current_level;
1323: #endif

1325:   /* Insert values in the change of basis matrix */
1326:   for (i=0;i<nee;i++) {
1327:     Mat         Gins = NULL, GKins = NULL;
1328:     IS          cornersis = NULL;
1329:     PetscScalar cvals[2];

1331:     if (pcbddc->nedcG) {
1332:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1333:     }
1334:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1335:     if (Gins && GKins) {
1336:       PetscScalar    *data;
1337:       const PetscInt *rows,*cols;
1338:       PetscInt       nrh,nch,nrc,ncc;

1340:       ISGetIndices(eedges[i],&cols);
1341:       /* H1 */
1342:       ISGetIndices(extrows[i],&rows);
1343:       MatGetSize(Gins,&nrh,&nch);
1344:       MatDenseGetArray(Gins,&data);
1345:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1346:       MatDenseRestoreArray(Gins,&data);
1347:       ISRestoreIndices(extrows[i],&rows);
1348:       /* complement */
1349:       MatGetSize(GKins,&nrc,&ncc);
1350:       if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i);
1351:       if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i);
1352:       if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc);
1353:       MatDenseGetArray(GKins,&data);
1354:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1355:       MatDenseRestoreArray(GKins,&data);

1357:       /* coarse discrete gradient */
1358:       if (pcbddc->nedcG) {
1359:         PetscInt cols[2];

1361:         cols[0] = 2*i;
1362:         cols[1] = 2*i+1;
1363:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1364:       }
1365:       ISRestoreIndices(eedges[i],&cols);
1366:     }
1367:     ISDestroy(&extrows[i]);
1368:     ISDestroy(&extcols[i]);
1369:     ISDestroy(&cornersis);
1370:     MatDestroy(&Gins);
1371:     MatDestroy(&GKins);
1372:   }
1373:   ISLocalToGlobalMappingDestroy(&el2g);

1375:   /* Start assembling */
1376:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1377:   if (pcbddc->nedcG) {
1378:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1379:   }

1381:   /* Free */
1382:   if (fl2g) {
1383:     ISDestroy(&primals);
1384:     for (i=0;i<nee;i++) {
1385:       ISDestroy(&eedges[i]);
1386:     }
1387:     PetscFree(eedges);
1388:   }

1390:   /* hack mat_graph with primal dofs on the coarse edges */
1391:   {
1392:     PCBDDCGraph graph   = pcbddc->mat_graph;
1393:     PetscInt    *oqueue = graph->queue;
1394:     PetscInt    *ocptr  = graph->cptr;
1395:     PetscInt    ncc,*idxs;

1397:     /* find first primal edge */
1398:     if (pcbddc->nedclocal) {
1399:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1400:     } else {
1401:       if (fl2g) {
1402:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1403:       }
1404:       idxs = cedges;
1405:     }
1406:     cum = 0;
1407:     while (cum < nee && cedges[cum] < 0) cum++;

1409:     /* adapt connected components */
1410:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1411:     graph->cptr[0] = 0;
1412:     for (i=0,ncc=0;i<graph->ncc;i++) {
1413:       PetscInt lc = ocptr[i+1]-ocptr[i];
1414:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1415:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1416:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1417:         ncc++;
1418:         lc--;
1419:         cum++;
1420:         while (cum < nee && cedges[cum] < 0) cum++;
1421:       }
1422:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1423:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1424:       ncc++;
1425:     }
1426:     graph->ncc = ncc;
1427:     if (pcbddc->nedclocal) {
1428:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1429:     }
1430:     PetscFree2(ocptr,oqueue);
1431:   }
1432:   ISLocalToGlobalMappingDestroy(&fl2g);
1433:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1434:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1435:   MatDestroy(&conn);

1437:   ISDestroy(&nedfieldlocal);
1438:   PetscFree(extrow);
1439:   PetscFree2(work,rwork);
1440:   PetscFree(corners);
1441:   PetscFree(cedges);
1442:   PetscFree(extrows);
1443:   PetscFree(extcols);
1444:   MatDestroy(&lG);

1446:   /* Complete assembling */
1447:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1448:   if (pcbddc->nedcG) {
1449:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1450: #if 0
1451:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1452:     MatView(pcbddc->nedcG,NULL);
1453: #endif
1454:   }

1456:   /* set change of basis */
1457:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1458:   MatDestroy(&T);

1460:   return(0);
1461: }

1463: /* the near-null space of BDDC carries information on quadrature weights,
1464:    and these can be collinear -> so cheat with MatNullSpaceCreate
1465:    and create a suitable set of basis vectors first */
1466: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1467: {
1469:   PetscInt       i;

1472:   for (i=0;i<nvecs;i++) {
1473:     PetscInt first,last;

1475:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1476:     if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1477:     if (i>=first && i < last) {
1478:       PetscScalar *data;
1479:       VecGetArray(quad_vecs[i],&data);
1480:       if (!has_const) {
1481:         data[i-first] = 1.;
1482:       } else {
1483:         data[2*i-first] = 1./PetscSqrtReal(2.);
1484:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1485:       }
1486:       VecRestoreArray(quad_vecs[i],&data);
1487:     }
1488:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1489:   }
1490:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1491:   for (i=0;i<nvecs;i++) { /* reset vectors */
1492:     PetscInt first,last;
1493:     VecLockPop(quad_vecs[i]);
1494:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1495:     if (i>=first && i < last) {
1496:       PetscScalar *data;
1497:       VecGetArray(quad_vecs[i],&data);
1498:       if (!has_const) {
1499:         data[i-first] = 0.;
1500:       } else {
1501:         data[2*i-first] = 0.;
1502:         data[2*i-first+1] = 0.;
1503:       }
1504:       VecRestoreArray(quad_vecs[i],&data);
1505:     }
1506:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1507:     VecLockPush(quad_vecs[i]);
1508:   }
1509:   return(0);
1510: }

1512: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1513: {
1514:   Mat                    loc_divudotp;
1515:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1516:   ISLocalToGlobalMapping map;
1517:   PetscScalar            *vals;
1518:   const PetscScalar      *array;
1519:   PetscInt               i,maxneighs,maxsize;
1520:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
1521:   PetscMPIInt            rank;
1522:   PetscErrorCode         ierr;

1525:   ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1526:   MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1527:   if (!maxneighs) {
1528:     ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1529:     *nnsp = NULL;
1530:     return(0);
1531:   }
1532:   maxsize = 0;
1533:   for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1534:   PetscMalloc1(maxsize,&vals);
1535:   /* create vectors to hold quadrature weights */
1536:   MatCreateVecs(A,&quad_vec,NULL);
1537:   if (!transpose) {
1538:     MatGetLocalToGlobalMapping(A,&map,NULL);
1539:   } else {
1540:     MatGetLocalToGlobalMapping(A,NULL,&map);
1541:   }
1542:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1543:   VecDestroy(&quad_vec);
1544:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1545:   for (i=0;i<maxneighs;i++) {
1546:     VecLockPop(quad_vecs[i]);
1547:     VecSetLocalToGlobalMapping(quad_vecs[i],map);
1548:   }

1550:   /* compute local quad vec */
1551:   MatISGetLocalMat(divudotp,&loc_divudotp);
1552:   if (!transpose) {
1553:     MatCreateVecs(loc_divudotp,&v,&p);
1554:   } else {
1555:     MatCreateVecs(loc_divudotp,&p,&v);
1556:   }
1557:   VecSet(p,1.);
1558:   if (!transpose) {
1559:     MatMultTranspose(loc_divudotp,p,v);
1560:   } else {
1561:     MatMult(loc_divudotp,p,v);
1562:   }
1563:   if (vl2l) {
1564:     Mat        lA;
1565:     VecScatter sc;

1567:     MatISGetLocalMat(A,&lA);
1568:     MatCreateVecs(lA,&vins,NULL);
1569:     VecScatterCreate(v,vl2l,vins,NULL,&sc);
1570:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1571:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1572:     VecScatterDestroy(&sc);
1573:   } else {
1574:     vins = v;
1575:   }
1576:   VecGetArrayRead(vins,&array);
1577:   VecDestroy(&p);

1579:   /* insert in global quadrature vecs */
1580:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1581:   for (i=0;i<n_neigh;i++) {
1582:     const PetscInt    *idxs;
1583:     PetscInt          idx,nn,j;

1585:     idxs = shared[i];
1586:     nn   = n_shared[i];
1587:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1588:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1589:     idx  = -(idx+1);
1590:     VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1591:   }
1592:   ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1593:   VecRestoreArrayRead(vins,&array);
1594:   if (vl2l) {
1595:     VecDestroy(&vins);
1596:   }
1597:   VecDestroy(&v);
1598:   PetscFree(vals);

1600:   /* assemble near null space */
1601:   for (i=0;i<maxneighs;i++) {
1602:     VecAssemblyBegin(quad_vecs[i]);
1603:   }
1604:   for (i=0;i<maxneighs;i++) {
1605:     VecAssemblyEnd(quad_vecs[i]);
1606:     VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1607:     VecLockPush(quad_vecs[i]);
1608:   }
1609:   VecDestroyVecs(maxneighs,&quad_vecs);
1610:   return(0);
1611: }

1613: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1614: {
1615:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

1619:   if (primalv) {
1620:     if (pcbddc->user_primal_vertices_local) {
1621:       IS list[2], newp;

1623:       list[0] = primalv;
1624:       list[1] = pcbddc->user_primal_vertices_local;
1625:       ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1626:       ISSortRemoveDups(newp);
1627:       ISDestroy(&list[1]);
1628:       pcbddc->user_primal_vertices_local = newp;
1629:     } else {
1630:       PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1631:     }
1632:   }
1633:   return(0);
1634: }

1636: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1637: {
1638:   PetscInt f, *comp  = (PetscInt *)ctx;

1641:   for (f=0;f<Nf;f++) out[f] = X[*comp];
1642:   return(0);
1643: }

1645: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1646: {
1648:   Vec            local,global;
1649:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1650:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1651:   PetscBool      monolithic = PETSC_FALSE;

1654:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1655:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1656:   PetscOptionsEnd();
1657:   /* need to convert from global to local topology information and remove references to information in global ordering */
1658:   MatCreateVecs(pc->pmat,&global,NULL);
1659:   MatCreateVecs(matis->A,&local,NULL);
1660:   if (monolithic) { /* just get block size to properly compute vertices */
1661:     if (pcbddc->vertex_size == 1) {
1662:       MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1663:     }
1664:     goto boundary;
1665:   }

1667:   if (pcbddc->user_provided_isfordofs) {
1668:     if (pcbddc->n_ISForDofs) {
1669:       PetscInt i;
1670:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1671:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1672:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1673:         ISDestroy(&pcbddc->ISForDofs[i]);
1674:       }
1675:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1676:       pcbddc->n_ISForDofs = 0;
1677:       PetscFree(pcbddc->ISForDofs);
1678:     }
1679:   } else {
1680:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1681:       DM dm;

1683:       PCGetDM(pc, &dm);
1684:       if (!dm) {
1685:         MatGetDM(pc->pmat, &dm);
1686:       }
1687:       if (dm) {
1688:         IS      *fields;
1689:         PetscInt nf,i;
1690:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1691:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1692:         for (i=0;i<nf;i++) {
1693:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1694:           ISDestroy(&fields[i]);
1695:         }
1696:         PetscFree(fields);
1697:         pcbddc->n_ISForDofsLocal = nf;
1698:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1699:         PetscContainer   c;

1701:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1702:         if (c) {
1703:           MatISLocalFields lf;
1704:           PetscContainerGetPointer(c,(void**)&lf);
1705:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1706:         } else { /* fallback, create the default fields if bs > 1 */
1707:           PetscInt i, n = matis->A->rmap->n;
1708:           MatGetBlockSize(pc->pmat,&i);
1709:           if (i > 1) {
1710:             pcbddc->n_ISForDofsLocal = i;
1711:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1712:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1713:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1714:             }
1715:           }
1716:         }
1717:       }
1718:     } else {
1719:       PetscInt i;
1720:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1721:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1722:       }
1723:     }
1724:   }

1726: boundary:
1727:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1728:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1729:   } else if (pcbddc->DirichletBoundariesLocal) {
1730:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1731:   }
1732:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1733:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1734:   } else if (pcbddc->NeumannBoundariesLocal) {
1735:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1736:   }
1737:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1738:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1739:   }
1740:   VecDestroy(&global);
1741:   VecDestroy(&local);
1742:   /* detect local disconnected subdomains if requested (use matis->A) */
1743:   if (pcbddc->detect_disconnected) {
1744:     IS       primalv = NULL;
1745:     PetscInt i;

1747:     for (i=0;i<pcbddc->n_local_subs;i++) {
1748:       ISDestroy(&pcbddc->local_subs[i]);
1749:     }
1750:     PetscFree(pcbddc->local_subs);
1751:     PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1752:     PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1753:     ISDestroy(&primalv);
1754:   }
1755:   /* early stage corner detection */
1756:   {
1757:     DM dm;

1759:     MatGetDM(pc->pmat,&dm);
1760:     if (dm) {
1761:       PetscBool isda;

1763:       PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1764:       if (isda) {
1765:         ISLocalToGlobalMapping l2l;
1766:         IS                     corners;
1767:         Mat                    lA;

1769:         DMDAGetSubdomainCornersIS(dm,&corners);
1770:         MatISGetLocalMat(pc->pmat,&lA);
1771:         MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1772:         MatISRestoreLocalMat(pc->pmat,&lA);
1773:         if (l2l) {
1774:           const PetscInt *idx;
1775:           PetscInt       bs,*idxout,n;

1777:           ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1778:           ISGetLocalSize(corners,&n);
1779:           ISGetIndices(corners,&idx);
1780:           PetscMalloc1(n,&idxout);
1781:           ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1782:           ISRestoreIndices(corners,&idx);
1783:           DMDARestoreSubdomainCornersIS(dm,&corners);
1784:           ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1785:           PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1786:           ISDestroy(&corners);
1787:           pcbddc->corner_selected = PETSC_TRUE;
1788:         } else { /* not from DMDA */
1789:           DMDARestoreSubdomainCornersIS(dm,&corners);
1790:         }
1791:       }
1792:     }
1793:   }
1794:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1795:     DM dm;

1797:     PCGetDM(pc,&dm);
1798:     if (!dm) {
1799:       MatGetDM(pc->pmat,&dm);
1800:     }
1801:     if (dm) {
1802:       Vec            vcoords;
1803:       PetscSection   section;
1804:       PetscReal      *coords;
1805:       PetscInt       d,cdim,nl,nf,**ctxs;
1806:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);

1808:       DMGetCoordinateDim(dm,&cdim);
1809:       DMGetDefaultSection(dm,&section);
1810:       PetscSectionGetNumFields(section,&nf);
1811:       DMCreateGlobalVector(dm,&vcoords);
1812:       VecGetLocalSize(vcoords,&nl);
1813:       PetscMalloc1(nl*cdim,&coords);
1814:       PetscMalloc2(nf,&funcs,nf,&ctxs);
1815:       PetscMalloc1(nf,&ctxs[0]);
1816:       for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1817:       for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1818:       for (d=0;d<cdim;d++) {
1819:         PetscInt          i;
1820:         const PetscScalar *v;

1822:         for (i=0;i<nf;i++) ctxs[i][0] = d;
1823:         DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1824:         VecGetArrayRead(vcoords,&v);
1825:         for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1826:         VecRestoreArrayRead(vcoords,&v);
1827:       }
1828:       VecDestroy(&vcoords);
1829:       PCSetCoordinates(pc,cdim,nl,coords);
1830:       PetscFree(coords);
1831:       PetscFree(ctxs[0]);
1832:       PetscFree2(funcs,ctxs);
1833:     }
1834:   }
1835:   return(0);
1836: }

1838: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1839: {
1840:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1841:   PetscErrorCode  ierr;
1842:   IS              nis;
1843:   const PetscInt  *idxs;
1844:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1845:   PetscBool       *ld;

1848:   if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1849:   MatISSetUpSF(pc->pmat);
1850:   if (mop == MPI_LAND) {
1851:     /* init rootdata with true */
1852:     ld   = (PetscBool*) matis->sf_rootdata;
1853:     for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1854:   } else {
1855:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1856:   }
1857:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1858:   ISGetLocalSize(*is,&nd);
1859:   ISGetIndices(*is,&idxs);
1860:   ld   = (PetscBool*) matis->sf_leafdata;
1861:   for (i=0;i<nd;i++)
1862:     if (-1 < idxs[i] && idxs[i] < n)
1863:       ld[idxs[i]] = PETSC_TRUE;
1864:   ISRestoreIndices(*is,&idxs);
1865:   PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1866:   PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1867:   PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1868:   PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1869:   if (mop == MPI_LAND) {
1870:     PetscMalloc1(nd,&nidxs);
1871:   } else {
1872:     PetscMalloc1(n,&nidxs);
1873:   }
1874:   for (i=0,nnd=0;i<n;i++)
1875:     if (ld[i])
1876:       nidxs[nnd++] = i;
1877:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1878:   ISDestroy(is);
1879:   *is  = nis;
1880:   return(0);
1881: }

1883: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1884: {
1885:   PC_IS             *pcis = (PC_IS*)(pc->data);
1886:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1887:   PetscErrorCode    ierr;

1890:   if (!pcbddc->benign_have_null) {
1891:     return(0);
1892:   }
1893:   if (pcbddc->ChangeOfBasisMatrix) {
1894:     Vec swap;

1896:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1897:     swap = pcbddc->work_change;
1898:     pcbddc->work_change = r;
1899:     r = swap;
1900:   }
1901:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1902:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1903:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1904:   VecSet(z,0.);
1905:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1906:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1907:   if (pcbddc->ChangeOfBasisMatrix) {
1908:     pcbddc->work_change = r;
1909:     VecCopy(z,pcbddc->work_change);
1910:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1911:   }
1912:   return(0);
1913: }

1915: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1916: {
1917:   PCBDDCBenignMatMult_ctx ctx;
1918:   PetscErrorCode          ierr;
1919:   PetscBool               apply_right,apply_left,reset_x;

1922:   MatShellGetContext(A,&ctx);
1923:   if (transpose) {
1924:     apply_right = ctx->apply_left;
1925:     apply_left = ctx->apply_right;
1926:   } else {
1927:     apply_right = ctx->apply_right;
1928:     apply_left = ctx->apply_left;
1929:   }
1930:   reset_x = PETSC_FALSE;
1931:   if (apply_right) {
1932:     const PetscScalar *ax;
1933:     PetscInt          nl,i;

1935:     VecGetLocalSize(x,&nl);
1936:     VecGetArrayRead(x,&ax);
1937:     PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1938:     VecRestoreArrayRead(x,&ax);
1939:     for (i=0;i<ctx->benign_n;i++) {
1940:       PetscScalar    sum,val;
1941:       const PetscInt *idxs;
1942:       PetscInt       nz,j;
1943:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1944:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1945:       sum = 0.;
1946:       if (ctx->apply_p0) {
1947:         val = ctx->work[idxs[nz-1]];
1948:         for (j=0;j<nz-1;j++) {
1949:           sum += ctx->work[idxs[j]];
1950:           ctx->work[idxs[j]] += val;
1951:         }
1952:       } else {
1953:         for (j=0;j<nz-1;j++) {
1954:           sum += ctx->work[idxs[j]];
1955:         }
1956:       }
1957:       ctx->work[idxs[nz-1]] -= sum;
1958:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1959:     }
1960:     VecPlaceArray(x,ctx->work);
1961:     reset_x = PETSC_TRUE;
1962:   }
1963:   if (transpose) {
1964:     MatMultTranspose(ctx->A,x,y);
1965:   } else {
1966:     MatMult(ctx->A,x,y);
1967:   }
1968:   if (reset_x) {
1969:     VecResetArray(x);
1970:   }
1971:   if (apply_left) {
1972:     PetscScalar *ay;
1973:     PetscInt    i;

1975:     VecGetArray(y,&ay);
1976:     for (i=0;i<ctx->benign_n;i++) {
1977:       PetscScalar    sum,val;
1978:       const PetscInt *idxs;
1979:       PetscInt       nz,j;
1980:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1981:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1982:       val = -ay[idxs[nz-1]];
1983:       if (ctx->apply_p0) {
1984:         sum = 0.;
1985:         for (j=0;j<nz-1;j++) {
1986:           sum += ay[idxs[j]];
1987:           ay[idxs[j]] += val;
1988:         }
1989:         ay[idxs[nz-1]] += sum;
1990:       } else {
1991:         for (j=0;j<nz-1;j++) {
1992:           ay[idxs[j]] += val;
1993:         }
1994:         ay[idxs[nz-1]] = 0.;
1995:       }
1996:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1997:     }
1998:     VecRestoreArray(y,&ay);
1999:   }
2000:   return(0);
2001: }

2003: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2004: {

2008:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2009:   return(0);
2010: }

2012: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2013: {

2017:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2018:   return(0);
2019: }

2021: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2022: {
2023:   PC_IS                   *pcis = (PC_IS*)pc->data;
2024:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
2025:   PCBDDCBenignMatMult_ctx ctx;
2026:   PetscErrorCode          ierr;

2029:   if (!restore) {
2030:     Mat                A_IB,A_BI;
2031:     PetscScalar        *work;
2032:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

2034:     if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2035:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2036:     PetscMalloc1(pcis->n,&work);
2037:     MatCreate(PETSC_COMM_SELF,&A_IB);
2038:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2039:     MatSetType(A_IB,MATSHELL);
2040:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2041:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2042:     PetscNew(&ctx);
2043:     MatShellSetContext(A_IB,ctx);
2044:     ctx->apply_left = PETSC_TRUE;
2045:     ctx->apply_right = PETSC_FALSE;
2046:     ctx->apply_p0 = PETSC_FALSE;
2047:     ctx->benign_n = pcbddc->benign_n;
2048:     if (reuse) {
2049:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2050:       ctx->free = PETSC_FALSE;
2051:     } else { /* TODO: could be optimized for successive solves */
2052:       ISLocalToGlobalMapping N_to_D;
2053:       PetscInt               i;

2055:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2056:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2057:       for (i=0;i<pcbddc->benign_n;i++) {
2058:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2059:       }
2060:       ISLocalToGlobalMappingDestroy(&N_to_D);
2061:       ctx->free = PETSC_TRUE;
2062:     }
2063:     ctx->A = pcis->A_IB;
2064:     ctx->work = work;
2065:     MatSetUp(A_IB);
2066:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2067:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2068:     pcis->A_IB = A_IB;

2070:     /* A_BI as A_IB^T */
2071:     MatCreateTranspose(A_IB,&A_BI);
2072:     pcbddc->benign_original_mat = pcis->A_BI;
2073:     pcis->A_BI = A_BI;
2074:   } else {
2075:     if (!pcbddc->benign_original_mat) {
2076:       return(0);
2077:     }
2078:     MatShellGetContext(pcis->A_IB,&ctx);
2079:     MatDestroy(&pcis->A_IB);
2080:     pcis->A_IB = ctx->A;
2081:     ctx->A = NULL;
2082:     MatDestroy(&pcis->A_BI);
2083:     pcis->A_BI = pcbddc->benign_original_mat;
2084:     pcbddc->benign_original_mat = NULL;
2085:     if (ctx->free) {
2086:       PetscInt i;
2087:       for (i=0;i<ctx->benign_n;i++) {
2088:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
2089:       }
2090:       PetscFree(ctx->benign_zerodiag_subs);
2091:     }
2092:     PetscFree(ctx->work);
2093:     PetscFree(ctx);
2094:   }
2095:   return(0);
2096: }

2098: /* used just in bddc debug mode */
2099: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2100: {
2101:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2102:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2103:   Mat            An;

2107:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2108:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2109:   if (is1) {
2110:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2111:     MatDestroy(&An);
2112:   } else {
2113:     *B = An;
2114:   }
2115:   return(0);
2116: }

2118: /* TODO: add reuse flag */
2119: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2120: {
2121:   Mat            Bt;
2122:   PetscScalar    *a,*bdata;
2123:   const PetscInt *ii,*ij;
2124:   PetscInt       m,n,i,nnz,*bii,*bij;
2125:   PetscBool      flg_row;

2129:   MatGetSize(A,&n,&m);
2130:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2131:   MatSeqAIJGetArray(A,&a);
2132:   nnz = n;
2133:   for (i=0;i<ii[n];i++) {
2134:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2135:   }
2136:   PetscMalloc1(n+1,&bii);
2137:   PetscMalloc1(nnz,&bij);
2138:   PetscMalloc1(nnz,&bdata);
2139:   nnz = 0;
2140:   bii[0] = 0;
2141:   for (i=0;i<n;i++) {
2142:     PetscInt j;
2143:     for (j=ii[i];j<ii[i+1];j++) {
2144:       PetscScalar entry = a[j];
2145:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2146:         bij[nnz] = ij[j];
2147:         bdata[nnz] = entry;
2148:         nnz++;
2149:       }
2150:     }
2151:     bii[i+1] = nnz;
2152:   }
2153:   MatSeqAIJRestoreArray(A,&a);
2154:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2155:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2156:   {
2157:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2158:     b->free_a = PETSC_TRUE;
2159:     b->free_ij = PETSC_TRUE;
2160:   }
2161:   if (*B == A) {
2162:     MatDestroy(&A);
2163:   }
2164:   *B = Bt;
2165:   return(0);
2166: }

2168: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv)
2169: {
2170:   Mat                    B = NULL;
2171:   DM                     dm;
2172:   IS                     is_dummy,*cc_n;
2173:   ISLocalToGlobalMapping l2gmap_dummy;
2174:   PCBDDCGraph            graph;
2175:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2176:   PetscInt               i,n;
2177:   PetscInt               *xadj,*adjncy;
2178:   PetscBool              isplex = PETSC_FALSE;
2179:   PetscErrorCode         ierr;

2182:   if (ncc) *ncc = 0;
2183:   if (cc) *cc = NULL;
2184:   if (primalv) *primalv = NULL;
2185:   PCBDDCGraphCreate(&graph);
2186:   PCGetDM(pc,&dm);
2187:   if (!dm) {
2188:     MatGetDM(pc->pmat,&dm);
2189:   }
2190:   if (dm) {
2191:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2192:   }
2193:   if (isplex) { /* this code has been modified from plexpartition.c */
2194:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2195:     PetscInt      *adj = NULL;
2196:     IS             cellNumbering;
2197:     const PetscInt *cellNum;
2198:     PetscBool      useCone, useClosure;
2199:     PetscSection   section;
2200:     PetscSegBuffer adjBuffer;
2201:     PetscSF        sfPoint;

2205:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2206:     DMGetPointSF(dm, &sfPoint);
2207:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2208:     /* Build adjacency graph via a section/segbuffer */
2209:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2210:     PetscSectionSetChart(section, pStart, pEnd);
2211:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2212:     /* Always use FVM adjacency to create partitioner graph */
2213:     DMPlexGetAdjacencyUseCone(dm, &useCone);
2214:     DMPlexGetAdjacencyUseClosure(dm, &useClosure);
2215:     DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);
2216:     DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);
2217:     DMPlexGetCellNumbering(dm, &cellNumbering);
2218:     ISGetIndices(cellNumbering, &cellNum);
2219:     for (n = 0, p = pStart; p < pEnd; p++) {
2220:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2221:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2222:       adjSize = PETSC_DETERMINE;
2223:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2224:       for (a = 0; a < adjSize; ++a) {
2225:         const PetscInt point = adj[a];
2226:         if (pStart <= point && point < pEnd) {
2227:           PetscInt *PETSC_RESTRICT pBuf;
2228:           PetscSectionAddDof(section, p, 1);
2229:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2230:           *pBuf = point;
2231:         }
2232:       }
2233:       n++;
2234:     }
2235:     DMPlexSetAdjacencyUseCone(dm, useCone);
2236:     DMPlexSetAdjacencyUseClosure(dm, useClosure);
2237:     /* Derive CSR graph from section/segbuffer */
2238:     PetscSectionSetUp(section);
2239:     PetscSectionGetStorageSize(section, &size);
2240:     PetscMalloc1(n+1, &xadj);
2241:     for (idx = 0, p = pStart; p < pEnd; p++) {
2242:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2243:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2244:     }
2245:     xadj[n] = size;
2246:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2247:     /* Clean up */
2248:     PetscSegBufferDestroy(&adjBuffer);
2249:     PetscSectionDestroy(&section);
2250:     PetscFree(adj);
2251:     graph->xadj = xadj;
2252:     graph->adjncy = adjncy;
2253:   } else {
2254:     Mat       A;
2255:     PetscBool filter = PETSC_FALSE, isseqaij, flg_row;

2257:     MatISGetLocalMat(pc->pmat,&A);
2258:     if (!A->rmap->N || !A->cmap->N) {
2259:       PCBDDCGraphDestroy(&graph);
2260:       return(0);
2261:     }
2262:     PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2263:     if (!isseqaij && filter) {
2264:       PetscBool isseqdense;

2266:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2267:       if (!isseqdense) {
2268:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2269:       } else { /* TODO: rectangular case and LDA */
2270:         PetscScalar *array;
2271:         PetscReal   chop=1.e-6;

2273:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2274:         MatDenseGetArray(B,&array);
2275:         MatGetSize(B,&n,NULL);
2276:         for (i=0;i<n;i++) {
2277:           PetscInt j;
2278:           for (j=i+1;j<n;j++) {
2279:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2280:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2281:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2282:           }
2283:         }
2284:         MatDenseRestoreArray(B,&array);
2285:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2286:       }
2287:     } else {
2288:       PetscObjectReference((PetscObject)A);
2289:       B = A;
2290:     }
2291:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2293:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2294:     if (filter) {
2295:       PetscScalar *data;
2296:       PetscInt    j,cum;

2298:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2299:       MatSeqAIJGetArray(B,&data);
2300:       cum = 0;
2301:       for (i=0;i<n;i++) {
2302:         PetscInt t;

2304:         for (j=xadj[i];j<xadj[i+1];j++) {
2305:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2306:             continue;
2307:           }
2308:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2309:         }
2310:         t = xadj_filtered[i];
2311:         xadj_filtered[i] = cum;
2312:         cum += t;
2313:       }
2314:       MatSeqAIJRestoreArray(B,&data);
2315:       graph->xadj = xadj_filtered;
2316:       graph->adjncy = adjncy_filtered;
2317:     } else {
2318:       graph->xadj = xadj;
2319:       graph->adjncy = adjncy;
2320:     }
2321:   }
2322:   /* compute local connected components using PCBDDCGraph */
2323:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2324:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2325:   ISDestroy(&is_dummy);
2326:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2327:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2328:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2329:   PCBDDCGraphComputeConnectedComponents(graph);

2331:   /* partial clean up */
2332:   PetscFree2(xadj_filtered,adjncy_filtered);
2333:   if (B) {
2334:     PetscBool flg_row;
2335:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2336:     MatDestroy(&B);
2337:   }
2338:   if (isplex) {
2339:     PetscFree(xadj);
2340:     PetscFree(adjncy);
2341:   }

2343:   /* get back data */
2344:   if (isplex) {
2345:     if (ncc) *ncc = graph->ncc;
2346:     if (cc || primalv) {
2347:       Mat          A;
2348:       PetscBT      btv,btvt;
2349:       PetscSection subSection;
2350:       PetscInt     *ids,cum,cump,*cids,*pids;

2352:       DMPlexGetSubdomainSection(dm,&subSection);
2353:       MatISGetLocalMat(pc->pmat,&A);
2354:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2355:       PetscBTCreate(A->rmap->n,&btv);
2356:       PetscBTCreate(A->rmap->n,&btvt);

2358:       cids[0] = 0;
2359:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2360:         PetscInt j;

2362:         PetscBTMemzero(A->rmap->n,btvt);
2363:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2364:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2366:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2367:           for (k = 0; k < 2*size; k += 2) {
2368:             PetscInt s, p = closure[k], off, dof, cdof;

2370:             PetscSectionGetConstraintDof(subSection, p, &cdof);
2371:             PetscSectionGetOffset(subSection,p,&off);
2372:             PetscSectionGetDof(subSection,p,&dof);
2373:             for (s = 0; s < dof-cdof; s++) {
2374:               if (PetscBTLookupSet(btvt,off+s)) continue;
2375:               if (!PetscBTLookup(btv,off+s)) {
2376:                 ids[cum++] = off+s;
2377:               } else { /* cross-vertex */
2378:                 pids[cump++] = off+s;
2379:               }
2380:             }
2381:           }
2382:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2383:         }
2384:         cids[i+1] = cum;
2385:         /* mark dofs as already assigned */
2386:         for (j = cids[i]; j < cids[i+1]; j++) {
2387:           PetscBTSet(btv,ids[j]);
2388:         }
2389:       }
2390:       if (cc) {
2391:         PetscMalloc1(graph->ncc,&cc_n);
2392:         for (i = 0; i < graph->ncc; i++) {
2393:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2394:         }
2395:         *cc = cc_n;
2396:       }
2397:       if (primalv) {
2398:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2399:       }
2400:       PetscFree3(ids,cids,pids);
2401:       PetscBTDestroy(&btv);
2402:       PetscBTDestroy(&btvt);
2403:     }
2404:   } else {
2405:     if (ncc) *ncc = graph->ncc;
2406:     if (cc) {
2407:       PetscMalloc1(graph->ncc,&cc_n);
2408:       for (i=0;i<graph->ncc;i++) {
2409:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2410:       }
2411:       *cc = cc_n;
2412:     }
2413:   }
2414:   /* clean up graph */
2415:   graph->xadj = 0;
2416:   graph->adjncy = 0;
2417:   PCBDDCGraphDestroy(&graph);
2418:   return(0);
2419: }

2421: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2422: {
2423:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2424:   PC_IS*         pcis = (PC_IS*)(pc->data);
2425:   IS             dirIS = NULL;
2426:   PetscInt       i;

2430:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2431:   if (zerodiag) {
2432:     Mat            A;
2433:     Vec            vec3_N;
2434:     PetscScalar    *vals;
2435:     const PetscInt *idxs;
2436:     PetscInt       nz,*count;

2438:     /* p0 */
2439:     VecSet(pcis->vec1_N,0.);
2440:     PetscMalloc1(pcis->n,&vals);
2441:     ISGetLocalSize(zerodiag,&nz);
2442:     ISGetIndices(zerodiag,&idxs);
2443:     for (i=0;i<nz;i++) vals[i] = 1.;
2444:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2445:     VecAssemblyBegin(pcis->vec1_N);
2446:     VecAssemblyEnd(pcis->vec1_N);
2447:     /* v_I */
2448:     VecSetRandom(pcis->vec2_N,NULL);
2449:     for (i=0;i<nz;i++) vals[i] = 0.;
2450:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2451:     ISRestoreIndices(zerodiag,&idxs);
2452:     ISGetIndices(pcis->is_B_local,&idxs);
2453:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2454:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2455:     ISRestoreIndices(pcis->is_B_local,&idxs);
2456:     if (dirIS) {
2457:       PetscInt n;

2459:       ISGetLocalSize(dirIS,&n);
2460:       ISGetIndices(dirIS,&idxs);
2461:       for (i=0;i<n;i++) vals[i] = 0.;
2462:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2463:       ISRestoreIndices(dirIS,&idxs);
2464:     }
2465:     VecAssemblyBegin(pcis->vec2_N);
2466:     VecAssemblyEnd(pcis->vec2_N);
2467:     VecDuplicate(pcis->vec1_N,&vec3_N);
2468:     VecSet(vec3_N,0.);
2469:     MatISGetLocalMat(pc->pmat,&A);
2470:     MatMult(A,pcis->vec1_N,vec3_N);
2471:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2472:     if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2473:     PetscFree(vals);
2474:     VecDestroy(&vec3_N);

2476:     /* there should not be any pressure dofs lying on the interface */
2477:     PetscCalloc1(pcis->n,&count);
2478:     ISGetIndices(pcis->is_B_local,&idxs);
2479:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2480:     ISRestoreIndices(pcis->is_B_local,&idxs);
2481:     ISGetIndices(zerodiag,&idxs);
2482:     for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]);
2483:     ISRestoreIndices(zerodiag,&idxs);
2484:     PetscFree(count);
2485:   }
2486:   ISDestroy(&dirIS);

2488:   /* check PCBDDCBenignGetOrSetP0 */
2489:   VecSetRandom(pcis->vec1_global,NULL);
2490:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2491:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2492:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2493:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2494:   for (i=0;i<pcbddc->benign_n;i++) {
2495:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2496:     if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2497:   }
2498:   return(0);
2499: }

2501: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
2502: {
2503:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2504:   IS             pressures,zerodiag,zerodiag_save,*zerodiag_subs;
2505:   PetscInt       nz,n;
2506:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2507:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2511:   PetscSFDestroy(&pcbddc->benign_sf);
2512:   MatDestroy(&pcbddc->benign_B0);
2513:   for (n=0;n<pcbddc->benign_n;n++) {
2514:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2515:   }
2516:   PetscFree(pcbddc->benign_zerodiag_subs);
2517:   pcbddc->benign_n = 0;

2519:   /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line)
2520:      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
2521:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2522:      If not, a change of basis on pressures is not needed
2523:      since the local Schur complements are already SPD
2524:   */
2525:   has_null_pressures = PETSC_TRUE;
2526:   have_null = PETSC_TRUE;
2527:   if (pcbddc->n_ISForDofsLocal) {
2528:     IS       iP = NULL;
2529:     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;

2531:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2532:     PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);
2533:     PetscOptionsEnd();
2534:     if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p);
2535:     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
2536:     ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);
2537:     ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2538:     ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);
2539:     ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2540:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2541:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2542:     if (iP) {
2543:       IS newpressures;

2545:       ISDifference(pressures,iP,&newpressures);
2546:       ISDestroy(&pressures);
2547:       pressures = newpressures;
2548:     }
2549:     ISSorted(pressures,&sorted);
2550:     if (!sorted) {
2551:       ISSort(pressures);
2552:     }
2553:   } else {
2554:     pressures = NULL;
2555:   }
2556:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2557:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2558:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2559:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2560:   ISSorted(zerodiag,&sorted);
2561:   if (!sorted) {
2562:     ISSort(zerodiag);
2563:   }
2564:   PetscObjectReference((PetscObject)zerodiag);
2565:   zerodiag_save = zerodiag;
2566:   ISGetLocalSize(zerodiag,&nz);
2567:   if (!nz) {
2568:     if (n) have_null = PETSC_FALSE;
2569:     has_null_pressures = PETSC_FALSE;
2570:     ISDestroy(&zerodiag);
2571:   }
2572:   recompute_zerodiag = PETSC_FALSE;
2573:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2574:   zerodiag_subs    = NULL;
2575:   pcbddc->benign_n = 0;
2576:   n_interior_dofs  = 0;
2577:   interior_dofs    = NULL;
2578:   nneu             = 0;
2579:   if (pcbddc->NeumannBoundariesLocal) {
2580:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2581:   }
2582:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2583:   if (checkb) { /* need to compute interior nodes */
2584:     PetscInt n,i,j;
2585:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2586:     PetscInt *iwork;

2588:     ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2589:     ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2590:     PetscCalloc1(n,&iwork);
2591:     PetscMalloc1(n,&interior_dofs);
2592:     for (i=1;i<n_neigh;i++)
2593:       for (j=0;j<n_shared[i];j++)
2594:           iwork[shared[i][j]] += 1;
2595:     for (i=0;i<n;i++)
2596:       if (!iwork[i])
2597:         interior_dofs[n_interior_dofs++] = i;
2598:     PetscFree(iwork);
2599:     ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2600:   }
2601:   if (has_null_pressures) {
2602:     IS             *subs;
2603:     PetscInt       nsubs,i,j,nl;
2604:     const PetscInt *idxs;
2605:     PetscScalar    *array;
2606:     Vec            *work;
2607:     Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);

2609:     subs  = pcbddc->local_subs;
2610:     nsubs = pcbddc->n_local_subs;
2611:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2612:     if (checkb) {
2613:       VecDuplicateVecs(matis->y,2,&work);
2614:       ISGetLocalSize(zerodiag,&nl);
2615:       ISGetIndices(zerodiag,&idxs);
2616:       /* work[0] = 1_p */
2617:       VecSet(work[0],0.);
2618:       VecGetArray(work[0],&array);
2619:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2620:       VecRestoreArray(work[0],&array);
2621:       /* work[0] = 1_v */
2622:       VecSet(work[1],1.);
2623:       VecGetArray(work[1],&array);
2624:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2625:       VecRestoreArray(work[1],&array);
2626:       ISRestoreIndices(zerodiag,&idxs);
2627:     }
2628:     if (nsubs > 1) {
2629:       PetscCalloc1(nsubs,&zerodiag_subs);
2630:       for (i=0;i<nsubs;i++) {
2631:         ISLocalToGlobalMapping l2g;
2632:         IS                     t_zerodiag_subs;
2633:         PetscInt               nl;

2635:         ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2636:         ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);
2637:         ISGetLocalSize(t_zerodiag_subs,&nl);
2638:         if (nl) {
2639:           PetscBool valid = PETSC_TRUE;

2641:           if (checkb) {
2642:             VecSet(matis->x,0);
2643:             ISGetLocalSize(subs[i],&nl);
2644:             ISGetIndices(subs[i],&idxs);
2645:             VecGetArray(matis->x,&array);
2646:             for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2647:             VecRestoreArray(matis->x,&array);
2648:             ISRestoreIndices(subs[i],&idxs);
2649:             VecPointwiseMult(matis->x,work[0],matis->x);
2650:             MatMult(matis->A,matis->x,matis->y);
2651:             VecPointwiseMult(matis->y,work[1],matis->y);
2652:             VecGetArray(matis->y,&array);
2653:             for (j=0;j<n_interior_dofs;j++) {
2654:               if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2655:                 valid = PETSC_FALSE;
2656:                 break;
2657:               }
2658:             }
2659:             VecRestoreArray(matis->y,&array);
2660:           }
2661:           if (valid && nneu) {
2662:             const PetscInt *idxs;
2663:             PetscInt       nzb;

2665:             ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2666:             ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2667:             ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2668:             if (nzb) valid = PETSC_FALSE;
2669:           }
2670:           if (valid && pressures) {
2671:             IS t_pressure_subs;
2672:             ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2673:             ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);
2674:             ISDestroy(&t_pressure_subs);
2675:           }
2676:           if (valid) {
2677:             ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);
2678:             pcbddc->benign_n++;
2679:           } else {
2680:             recompute_zerodiag = PETSC_TRUE;
2681:           }
2682:         }
2683:         ISDestroy(&t_zerodiag_subs);
2684:         ISLocalToGlobalMappingDestroy(&l2g);
2685:       }
2686:     } else { /* there's just one subdomain (or zero if they have not been detected */
2687:       PetscBool valid = PETSC_TRUE;

2689:       if (nneu) valid = PETSC_FALSE;
2690:       if (valid && pressures) {
2691:         ISEqual(pressures,zerodiag,&valid);
2692:       }
2693:       if (valid && checkb) {
2694:         MatMult(matis->A,work[0],matis->x);
2695:         VecPointwiseMult(matis->x,work[1],matis->x);
2696:         VecGetArray(matis->x,&array);
2697:         for (j=0;j<n_interior_dofs;j++) {
2698:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2699:             valid = PETSC_FALSE;
2700:             break;
2701:           }
2702:         }
2703:         VecRestoreArray(matis->x,&array);
2704:       }
2705:       if (valid) {
2706:         pcbddc->benign_n = 1;
2707:         PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);
2708:         PetscObjectReference((PetscObject)zerodiag);
2709:         zerodiag_subs[0] = zerodiag;
2710:       }
2711:     }
2712:     if (checkb) {
2713:       VecDestroyVecs(2,&work);
2714:     }
2715:   }
2716:   PetscFree(interior_dofs);

2718:   if (!pcbddc->benign_n) {
2719:     PetscInt n;

2721:     ISDestroy(&zerodiag);
2722:     recompute_zerodiag = PETSC_FALSE;
2723:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2724:     if (n) {
2725:       has_null_pressures = PETSC_FALSE;
2726:       have_null = PETSC_FALSE;
2727:     }
2728:   }

2730:   /* final check for null pressures */
2731:   if (zerodiag && pressures) {
2732:     PetscInt nz,np;
2733:     ISGetLocalSize(zerodiag,&nz);
2734:     ISGetLocalSize(pressures,&np);
2735:     if (nz != np) have_null = PETSC_FALSE;
2736:   }

2738:   if (recompute_zerodiag) {
2739:     ISDestroy(&zerodiag);
2740:     if (pcbddc->benign_n == 1) {
2741:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2742:       zerodiag = zerodiag_subs[0];
2743:     } else {
2744:       PetscInt i,nzn,*new_idxs;

2746:       nzn = 0;
2747:       for (i=0;i<pcbddc->benign_n;i++) {
2748:         PetscInt ns;
2749:         ISGetLocalSize(zerodiag_subs[i],&ns);
2750:         nzn += ns;
2751:       }
2752:       PetscMalloc1(nzn,&new_idxs);
2753:       nzn = 0;
2754:       for (i=0;i<pcbddc->benign_n;i++) {
2755:         PetscInt ns,*idxs;
2756:         ISGetLocalSize(zerodiag_subs[i],&ns);
2757:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2758:         PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2759:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2760:         nzn += ns;
2761:       }
2762:       PetscSortInt(nzn,new_idxs);
2763:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2764:     }
2765:     have_null = PETSC_FALSE;
2766:   }

2768:   /* Prepare matrix to compute no-net-flux */
2769:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2770:     Mat                    A,loc_divudotp;
2771:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2772:     IS                     row,col,isused = NULL;
2773:     PetscInt               M,N,n,st,n_isused;

2775:     if (pressures) {
2776:       isused = pressures;
2777:     } else {
2778:       isused = zerodiag_save;
2779:     }
2780:     MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2781:     MatISGetLocalMat(pc->pmat,&A);
2782:     MatGetLocalSize(A,&n,NULL);
2783:     if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2784:     n_isused = 0;
2785:     if (isused) {
2786:       ISGetLocalSize(isused,&n_isused);
2787:     }
2788:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2789:     st = st-n_isused;
2790:     if (n) {
2791:       const PetscInt *gidxs;

2793:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2794:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2795:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2796:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2797:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2798:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2799:     } else {
2800:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2801:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2802:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2803:     }
2804:     MatGetSize(pc->pmat,NULL,&N);
2805:     ISGetSize(row,&M);
2806:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2807:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2808:     ISDestroy(&row);
2809:     ISDestroy(&col);
2810:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2811:     MatSetType(pcbddc->divudotp,MATIS);
2812:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2813:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2814:     ISLocalToGlobalMappingDestroy(&rl2g);
2815:     ISLocalToGlobalMappingDestroy(&cl2g);
2816:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2817:     MatDestroy(&loc_divudotp);
2818:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2819:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2820:   }
2821:   ISDestroy(&zerodiag_save);

2823:   /* change of basis and p0 dofs */
2824:   if (has_null_pressures) {
2825:     IS             zerodiagc;
2826:     const PetscInt *idxs,*idxsc;
2827:     PetscInt       i,s,*nnz;

2829:     ISGetLocalSize(zerodiag,&nz);
2830:     ISComplement(zerodiag,0,n,&zerodiagc);
2831:     ISGetIndices(zerodiagc,&idxsc);
2832:     /* local change of basis for pressures */
2833:     MatDestroy(&pcbddc->benign_change);
2834:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2835:     MatSetType(pcbddc->benign_change,MATAIJ);
2836:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2837:     PetscMalloc1(n,&nnz);
2838:     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
2839:     for (i=0;i<pcbddc->benign_n;i++) {
2840:       PetscInt nzs,j;

2842:       ISGetLocalSize(zerodiag_subs[i],&nzs);
2843:       ISGetIndices(zerodiag_subs[i],&idxs);
2844:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2845:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2846:       ISRestoreIndices(zerodiag_subs[i],&idxs);
2847:     }
2848:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2849:     PetscFree(nnz);
2850:     /* set identity on velocities */
2851:     for (i=0;i<n-nz;i++) {
2852:       MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);
2853:     }
2854:     ISRestoreIndices(zerodiagc,&idxsc);
2855:     ISDestroy(&zerodiagc);
2856:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2857:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2858:     /* set change on pressures */
2859:     for (s=0;s<pcbddc->benign_n;s++) {
2860:       PetscScalar *array;
2861:       PetscInt    nzs;

2863:       ISGetLocalSize(zerodiag_subs[s],&nzs);
2864:       ISGetIndices(zerodiag_subs[s],&idxs);
2865:       for (i=0;i<nzs-1;i++) {
2866:         PetscScalar vals[2];
2867:         PetscInt    cols[2];

2869:         cols[0] = idxs[i];
2870:         cols[1] = idxs[nzs-1];
2871:         vals[0] = 1.;
2872:         vals[1] = 1.;
2873:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2874:       }
2875:       PetscMalloc1(nzs,&array);
2876:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2877:       array[nzs-1] = 1.;
2878:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2879:       /* store local idxs for p0 */
2880:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2881:       ISRestoreIndices(zerodiag_subs[s],&idxs);
2882:       PetscFree(array);
2883:     }
2884:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2885:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2886:     /* project if needed */
2887:     if (pcbddc->benign_change_explicit) {
2888:       Mat M;

2890:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2891:       MatDestroy(&pcbddc->local_mat);
2892:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2893:       MatDestroy(&M);
2894:     }
2895:     /* store global idxs for p0 */
2896:     ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2897:   }
2898:   pcbddc->benign_zerodiag_subs = zerodiag_subs;
2899:   ISDestroy(&pressures);

2901:   /* determines if the coarse solver will be singular or not */
2902:   MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2903:   /* determines if the problem has subdomains with 0 pressure block */
2904:   MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2905:   *zerodiaglocal = zerodiag;
2906:   return(0);
2907: }

2909: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2910: {
2911:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2912:   PetscScalar    *array;

2916:   if (!pcbddc->benign_sf) {
2917:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2918:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2919:   }
2920:   if (get) {
2921:     VecGetArrayRead(v,(const PetscScalar**)&array);
2922:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2923:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2924:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
2925:   } else {
2926:     VecGetArray(v,&array);
2927:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2928:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2929:     VecRestoreArray(v,&array);
2930:   }
2931:   return(0);
2932: }

2934: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2935: {
2936:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

2940:   /* TODO: add error checking
2941:     - avoid nested pop (or push) calls.
2942:     - cannot push before pop.
2943:     - cannot call this if pcbddc->local_mat is NULL
2944:   */
2945:   if (!pcbddc->benign_n) {
2946:     return(0);
2947:   }
2948:   if (pop) {
2949:     if (pcbddc->benign_change_explicit) {
2950:       IS       is_p0;
2951:       MatReuse reuse;

2953:       /* extract B_0 */
2954:       reuse = MAT_INITIAL_MATRIX;
2955:       if (pcbddc->benign_B0) {
2956:         reuse = MAT_REUSE_MATRIX;
2957:       }
2958:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
2959:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
2960:       /* remove rows and cols from local problem */
2961:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
2962:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2963:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
2964:       ISDestroy(&is_p0);
2965:     } else {
2966:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
2967:       PetscScalar *vals;
2968:       PetscInt    i,n,*idxs_ins;

2970:       VecGetLocalSize(matis->y,&n);
2971:       PetscMalloc2(n,&idxs_ins,n,&vals);
2972:       if (!pcbddc->benign_B0) {
2973:         PetscInt *nnz;
2974:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
2975:         MatSetType(pcbddc->benign_B0,MATAIJ);
2976:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
2977:         PetscMalloc1(pcbddc->benign_n,&nnz);
2978:         for (i=0;i<pcbddc->benign_n;i++) {
2979:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
2980:           nnz[i] = n - nnz[i];
2981:         }
2982:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
2983:         PetscFree(nnz);
2984:       }

2986:       for (i=0;i<pcbddc->benign_n;i++) {
2987:         PetscScalar *array;
2988:         PetscInt    *idxs,j,nz,cum;

2990:         VecSet(matis->x,0.);
2991:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
2992:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2993:         for (j=0;j<nz;j++) vals[j] = 1.;
2994:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
2995:         VecAssemblyBegin(matis->x);
2996:         VecAssemblyEnd(matis->x);
2997:         VecSet(matis->y,0.);
2998:         MatMult(matis->A,matis->x,matis->y);
2999:         VecGetArray(matis->y,&array);
3000:         cum = 0;
3001:         for (j=0;j<n;j++) {
3002:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3003:             vals[cum] = array[j];
3004:             idxs_ins[cum] = j;
3005:             cum++;
3006:           }
3007:         }
3008:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3009:         VecRestoreArray(matis->y,&array);
3010:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3011:       }
3012:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3013:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3014:       PetscFree2(idxs_ins,vals);
3015:     }
3016:   } else { /* push */
3017:     if (pcbddc->benign_change_explicit) {
3018:       PetscInt i;

3020:       for (i=0;i<pcbddc->benign_n;i++) {
3021:         PetscScalar *B0_vals;
3022:         PetscInt    *B0_cols,B0_ncol;

3024:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3025:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3026:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3027:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3028:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3029:       }
3030:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3031:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3032:     } else {
3033:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n");
3034:     }
3035:   }
3036:   return(0);
3037: }

3039: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3040: {
3041:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3042:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3043:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
3044:   PetscBLASInt    *B_iwork,*B_ifail;
3045:   PetscScalar     *work,lwork;
3046:   PetscScalar     *St,*S,*eigv;
3047:   PetscScalar     *Sarray,*Starray;
3048:   PetscReal       *eigs,thresh,lthresh,uthresh;
3049:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3050:   PetscBool       allocated_S_St;
3051: #if defined(PETSC_USE_COMPLEX)
3052:   PetscReal       *rwork;
3053: #endif
3054:   PetscErrorCode  ierr;

3057:   if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3058:   if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3059:   if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);

3061:   if (pcbddc->dbg_flag) {
3062:     PetscViewerFlush(pcbddc->dbg_viewer);
3063:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3064:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3065:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3066:   }

3068:   if (pcbddc->dbg_flag) {
3069:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3070:   }

3072:   /* max size of subsets */
3073:   mss = 0;
3074:   for (i=0;i<sub_schurs->n_subs;i++) {
3075:     PetscInt subset_size;

3077:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3078:     mss = PetscMax(mss,subset_size);
3079:   }

3081:   /* min/max and threshold */
3082:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3083:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3084:   nmax = PetscMax(nmin,nmax);
3085:   allocated_S_St = PETSC_FALSE;
3086:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3087:     allocated_S_St = PETSC_TRUE;
3088:   }

3090:   /* allocate lapack workspace */
3091:   cum = cum2 = 0;
3092:   maxneigs = 0;
3093:   for (i=0;i<sub_schurs->n_subs;i++) {
3094:     PetscInt n,subset_size;

3096:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3097:     n = PetscMin(subset_size,nmax);
3098:     cum += subset_size;
3099:     cum2 += subset_size*n;
3100:     maxneigs = PetscMax(maxneigs,n);
3101:   }
3102:   if (mss) {
3103:     if (sub_schurs->is_symmetric) {
3104:       PetscBLASInt B_itype = 1;
3105:       PetscBLASInt B_N = mss;
3106:       PetscReal    zero = 0.0;
3107:       PetscReal    eps = 0.0; /* dlamch? */

3109:       B_lwork = -1;
3110:       S = NULL;
3111:       St = NULL;
3112:       eigs = NULL;
3113:       eigv = NULL;
3114:       B_iwork = NULL;
3115:       B_ifail = NULL;
3116: #if defined(PETSC_USE_COMPLEX)
3117:       rwork = NULL;
3118: #endif
3119:       thresh = 1.0;
3120:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3121: #if defined(PETSC_USE_COMPLEX)
3122:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3123: #else
3124:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3125: #endif
3126:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3127:       PetscFPTrapPop();
3128:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3129:   } else {
3130:     lwork = 0;
3131:   }

3133:   nv = 0;
3134:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3135:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3136:   }
3137:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3138:   if (allocated_S_St) {
3139:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3140:   }
3141:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3142: #if defined(PETSC_USE_COMPLEX)
3143:   PetscMalloc1(7*mss,&rwork);
3144: #endif
3145:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3146:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3147:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3148:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3149:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3150:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

3152:   maxneigs = 0;
3153:   cum = cumarray = 0;
3154:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3155:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3156:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3157:     const PetscInt *idxs;

3159:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3160:     for (cum=0;cum<nv;cum++) {
3161:       pcbddc->adaptive_constraints_n[cum] = 1;
3162:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3163:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3164:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3165:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3166:     }
3167:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3168:   }

3170:   if (mss) { /* multilevel */
3171:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3172:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3173:   }

3175:   lthresh = pcbddc->adaptive_threshold[0];
3176:   uthresh = pcbddc->adaptive_threshold[1];
3177:   for (i=0;i<sub_schurs->n_subs;i++) {
3178:     const PetscInt *idxs;
3179:     PetscReal      upper,lower;
3180:     PetscInt       j,subset_size,eigs_start = 0;
3181:     PetscBLASInt   B_N;
3182:     PetscBool      same_data = PETSC_FALSE;
3183:     PetscBool      scal = PETSC_FALSE;

3185:     if (pcbddc->use_deluxe_scaling) {
3186:       upper = PETSC_MAX_REAL;
3187:       lower = uthresh;
3188:     } else {
3189:       if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3190:       upper = 1./uthresh;
3191:       lower = 0.;
3192:     }
3193:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3194:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3195:     PetscBLASIntCast(subset_size,&B_N);
3196:     /* this is experimental: we assume the dofs have been properly grouped to have
3197:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3198:     if (!sub_schurs->is_posdef) {
3199:       Mat T;

3201:       for (j=0;j<subset_size;j++) {
3202:         if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3203:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3204:           MatScale(T,-1.0);
3205:           MatDestroy(&T);
3206:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3207:           MatScale(T,-1.0);
3208:           MatDestroy(&T);
3209:           if (sub_schurs->change_primal_sub) {
3210:             PetscInt       nz,k;
3211:             const PetscInt *idxs;

3213:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3214:             ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3215:             for (k=0;k<nz;k++) {
3216:               *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3217:               *(Starray + cumarray + idxs[k]*(subset_size+1))  = 0.0;
3218:             }
3219:             ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3220:           }
3221:           scal = PETSC_TRUE;
3222:           break;
3223:         }
3224:       }
3225:     }

3227:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3228:       if (sub_schurs->is_symmetric) {
3229:         PetscInt j,k;
3230:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3231:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3232:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3233:         }
3234:         for (j=0;j<subset_size;j++) {
3235:           for (k=j;k<subset_size;k++) {
3236:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3237:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3238:           }
3239:         }
3240:       } else {
3241:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3242:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3243:       }
3244:     } else {
3245:       S = Sarray + cumarray;
3246:       St = Starray + cumarray;
3247:     }
3248:     /* see if we can save some work */
3249:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3250:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3251:     }

3253:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3254:       B_neigs = 0;
3255:     } else {
3256:       if (sub_schurs->is_symmetric) {
3257:         PetscBLASInt B_itype = 1;
3258:         PetscBLASInt B_IL, B_IU;
3259:         PetscReal    eps = -1.0; /* dlamch? */
3260:         PetscInt     nmin_s;
3261:         PetscBool    compute_range;

3263:         B_neigs = 0;
3264:         compute_range = (PetscBool)!same_data;
3265:         if (nmin >= subset_size) compute_range = PETSC_FALSE;

3267:         if (pcbddc->dbg_flag) {
3268:           PetscInt nc = 0;

3270:           if (sub_schurs->change_primal_sub) {
3271:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3272:           }
3273:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d) (change %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3274:         }

3276:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3277:         if (compute_range) {

3279:           /* ask for eigenvalues larger than thresh */
3280:           if (sub_schurs->is_posdef) {
3281: #if defined(PETSC_USE_COMPLEX)
3282:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3283: #else
3284:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3285: #endif
3286:           } else { /* no theory so far, but it works nicely */
3287:             PetscInt  recipe = 0,recipe_m = 1;
3288:             PetscReal bb[2];

3290:             PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3291:             switch (recipe) {
3292:             case 0:
3293:               if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3294:               else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3295: #if defined(PETSC_USE_COMPLEX)
3296:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3297: #else
3298:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3299: #endif
3300:               break;
3301:             case 1:
3302:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3303: #if defined(PETSC_USE_COMPLEX)
3304:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3305: #else
3306:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3307: #endif
3308:               if (!scal) {
3309:                 PetscBLASInt B_neigs2 = 0;

3311:                 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3312:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3313:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3314: #if defined(PETSC_USE_COMPLEX)
3315:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3316: #else
3317:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3318: #endif
3319:                 B_neigs += B_neigs2;
3320:               }
3321:               break;
3322:             case 2:
3323:               if (scal) {
3324:                 bb[0] = PETSC_MIN_REAL;
3325:                 bb[1] = 0;
3326: #if defined(PETSC_USE_COMPLEX)
3327:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3328: #else
3329:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3330: #endif
3331:               } else {
3332:                 PetscBLASInt B_neigs2 = 0;
3333:                 PetscBool    import = PETSC_FALSE;

3335:                 lthresh = PetscMax(lthresh,0.0);
3336:                 if (lthresh > 0.0) {
3337:                   bb[0] = PETSC_MIN_REAL;
3338:                   bb[1] = lthresh*lthresh;

3340:                   import = PETSC_TRUE;
3341: #if defined(PETSC_USE_COMPLEX)
3342:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3343: #else
3344:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3345: #endif
3346:                 }
3347:                 bb[0] = PetscMax(lthresh*lthresh,uthresh);
3348:                 bb[1] = PETSC_MAX_REAL;
3349:                 if (import) {
3350:                   PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3351:                   PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3352:                 }
3353: #if defined(PETSC_USE_COMPLEX)
3354:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3355: #else
3356:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3357: #endif
3358:                 B_neigs += B_neigs2;
3359:               }
3360:               break;
3361:             case 3:
3362:               if (scal) {
3363:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3364:               } else {
3365:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3366:               }
3367:               if (!scal) {
3368:                 bb[0] = uthresh;
3369:                 bb[1] = PETSC_MAX_REAL;
3370: #if defined(PETSC_USE_COMPLEX)
3371:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3372: #else
3373:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3374: #endif
3375:               }
3376:               if (recipe_m > 0 && B_N - B_neigs > 0) {
3377:                 PetscBLASInt B_neigs2 = 0;

3379:                 B_IL = 1;
3380:                 PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3381:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3382:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3383: #if defined(PETSC_USE_COMPLEX)
3384:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3385: #else
3386:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3387: #endif
3388:                 B_neigs += B_neigs2;
3389:               }
3390:               break;
3391:             case 4:
3392:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3393: #if defined(PETSC_USE_COMPLEX)
3394:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3395: #else
3396:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3397: #endif
3398:               {
3399:                 PetscBLASInt B_neigs2 = 0;

3401:                 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3402:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3403:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3404: #if defined(PETSC_USE_COMPLEX)
3405:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3406: #else
3407:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3408: #endif
3409:                 B_neigs += B_neigs2;
3410:               }
3411:               break;
3412:             case 5: /* same as before: first compute all eigenvalues, then filter */
3413: #if defined(PETSC_USE_COMPLEX)
3414:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3415: #else
3416:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3417: #endif
3418:               {
3419:                 PetscInt e,k,ne;
3420:                 for (e=0,ne=0;e<B_neigs;e++) {
3421:                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3422:                     for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3423:                     eigs[ne] = eigs[e];
3424:                     ne++;
3425:                   }
3426:                 }
3427:                 PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));
3428:                 B_neigs = ne;
3429:               }
3430:               break;
3431:             default:
3432:               SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3433:               break;
3434:             }
3435:           }
3436:         } else if (!same_data) { /* this is just to see all the eigenvalues */
3437:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3438:           B_IL = 1;
3439: #if defined(PETSC_USE_COMPLEX)
3440:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3441: #else
3442:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3443: #endif
3444:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3445:           PetscInt k;
3446:           if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3447:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3448:           PetscBLASIntCast(nmax,&B_neigs);
3449:           nmin = nmax;
3450:           PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3451:           for (k=0;k<nmax;k++) {
3452:             eigs[k] = 1./PETSC_SMALL;
3453:             eigv[k*(subset_size+1)] = 1.0;
3454:           }
3455:         }
3456:         PetscFPTrapPop();
3457:         if (B_ierr) {
3458:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3459:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3460:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3461:         }

3463:         if (B_neigs > nmax) {
3464:           if (pcbddc->dbg_flag) {
3465:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
3466:           }
3467:           if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3468:           B_neigs = nmax;
3469:         }

3471:         nmin_s = PetscMin(nmin,B_N);
3472:         if (B_neigs < nmin_s) {
3473:           PetscBLASInt B_neigs2 = 0;

3475:           if (pcbddc->use_deluxe_scaling) {
3476:             if (scal) {
3477:               B_IU = nmin_s;
3478:               B_IL = B_neigs + 1;
3479:             } else {
3480:               B_IL = B_N - nmin_s + 1;
3481:               B_IU = B_N - B_neigs;
3482:             }
3483:           } else {
3484:             B_IL = B_neigs + 1;
3485:             B_IU = nmin_s;
3486:           }
3487:           if (pcbddc->dbg_flag) {
3488:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3489:           }
3490:           if (sub_schurs->is_symmetric) {
3491:             PetscInt j,k;
3492:             for (j=0;j<subset_size;j++) {
3493:               for (k=j;k<subset_size;k++) {
3494:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3495:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3496:               }
3497:             }
3498:           } else {
3499:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3500:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3501:           }
3502:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3503: #if defined(PETSC_USE_COMPLEX)
3504:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3505: #else
3506:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3507: #endif
3508:           PetscFPTrapPop();
3509:           B_neigs += B_neigs2;
3510:         }
3511:         if (B_ierr) {
3512:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3513:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3514:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3515:         }
3516:         if (pcbddc->dbg_flag) {
3517:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3518:           for (j=0;j<B_neigs;j++) {
3519:             if (eigs[j] == 0.0) {
3520:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3521:             } else {
3522:               if (pcbddc->use_deluxe_scaling) {
3523:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3524:               } else {
3525:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3526:               }
3527:             }
3528:           }
3529:         }
3530:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3531:     }
3532:     /* change the basis back to the original one */
3533:     if (sub_schurs->change) {
3534:       Mat change,phi,phit;

3536:       if (pcbddc->dbg_flag > 2) {
3537:         PetscInt ii;
3538:         for (ii=0;ii<B_neigs;ii++) {
3539:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3540:           for (j=0;j<B_N;j++) {
3541: #if defined(PETSC_USE_COMPLEX)
3542:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3543:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3544:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3545: #else
3546:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3547: #endif
3548:           }
3549:         }
3550:       }
3551:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3552:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3553:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3554:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3555:       MatDestroy(&phit);
3556:       MatDestroy(&phi);
3557:     }
3558:     maxneigs = PetscMax(B_neigs,maxneigs);
3559:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3560:     if (B_neigs) {
3561:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

3563:       if (pcbddc->dbg_flag > 1) {
3564:         PetscInt ii;
3565:         for (ii=0;ii<B_neigs;ii++) {
3566:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3567:           for (j=0;j<B_N;j++) {
3568: #if defined(PETSC_USE_COMPLEX)
3569:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3570:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3571:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3572: #else
3573:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3574: #endif
3575:           }
3576:         }
3577:       }
3578:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3579:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3580:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3581:       cum++;
3582:     }
3583:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3584:     /* shift for next computation */
3585:     cumarray += subset_size*subset_size;
3586:   }
3587:   if (pcbddc->dbg_flag) {
3588:     PetscViewerFlush(pcbddc->dbg_viewer);
3589:   }

3591:   if (mss) {
3592:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3593:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3594:     /* destroy matrices (junk) */
3595:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3596:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3597:   }
3598:   if (allocated_S_St) {
3599:     PetscFree2(S,St);
3600:   }
3601:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3602: #if defined(PETSC_USE_COMPLEX)
3603:   PetscFree(rwork);
3604: #endif
3605:   if (pcbddc->dbg_flag) {
3606:     PetscInt maxneigs_r;
3607:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3608:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
3609:   }
3610:   return(0);
3611: }

3613: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3614: {
3615:   PetscScalar    *coarse_submat_vals;

3619:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3620:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3621:   PCBDDCSetUpLocalScatters(pc);

3623:   /* Setup local neumann solver ksp_R */
3624:   /* PCBDDCSetUpLocalScatters should be called first! */
3625:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3627:   /*
3628:      Setup local correction and local part of coarse basis.
3629:      Gives back the dense local part of the coarse matrix in column major ordering
3630:   */
3631:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3633:   /* Compute total number of coarse nodes and setup coarse solver */
3634:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3636:   /* free */
3637:   PetscFree(coarse_submat_vals);
3638:   return(0);
3639: }

3641: PetscErrorCode PCBDDCResetCustomization(PC pc)
3642: {
3643:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3647:   ISDestroy(&pcbddc->user_primal_vertices);
3648:   ISDestroy(&pcbddc->user_primal_vertices_local);
3649:   ISDestroy(&pcbddc->NeumannBoundaries);
3650:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3651:   ISDestroy(&pcbddc->DirichletBoundaries);
3652:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3653:   PetscFree(pcbddc->onearnullvecs_state);
3654:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3655:   PCBDDCSetDofsSplitting(pc,0,NULL);
3656:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3657:   return(0);
3658: }

3660: PetscErrorCode PCBDDCResetTopography(PC pc)
3661: {
3662:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3663:   PetscInt       i;

3667:   MatDestroy(&pcbddc->nedcG);
3668:   ISDestroy(&pcbddc->nedclocal);
3669:   MatDestroy(&pcbddc->discretegradient);
3670:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3671:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3672:   MatDestroy(&pcbddc->switch_static_change);
3673:   VecDestroy(&pcbddc->work_change);
3674:   MatDestroy(&pcbddc->ConstraintMatrix);
3675:   MatDestroy(&pcbddc->divudotp);
3676:   ISDestroy(&pcbddc->divudotp_vl2l);
3677:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3678:   for (i=0;i<pcbddc->n_local_subs;i++) {
3679:     ISDestroy(&pcbddc->local_subs[i]);
3680:   }
3681:   pcbddc->n_local_subs = 0;
3682:   PetscFree(pcbddc->local_subs);
3683:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3684:   pcbddc->graphanalyzed        = PETSC_FALSE;
3685:   pcbddc->recompute_topography = PETSC_TRUE;
3686:   pcbddc->corner_selected      = PETSC_FALSE;
3687:   return(0);
3688: }

3690: PetscErrorCode PCBDDCResetSolvers(PC pc)
3691: {
3692:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3696:   VecDestroy(&pcbddc->coarse_vec);
3697:   if (pcbddc->coarse_phi_B) {
3698:     PetscScalar *array;
3699:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3700:     PetscFree(array);
3701:   }
3702:   MatDestroy(&pcbddc->coarse_phi_B);
3703:   MatDestroy(&pcbddc->coarse_phi_D);
3704:   MatDestroy(&pcbddc->coarse_psi_B);
3705:   MatDestroy(&pcbddc->coarse_psi_D);
3706:   VecDestroy(&pcbddc->vec1_P);
3707:   VecDestroy(&pcbddc->vec1_C);
3708:   MatDestroy(&pcbddc->local_auxmat2);
3709:   MatDestroy(&pcbddc->local_auxmat1);
3710:   VecDestroy(&pcbddc->vec1_R);
3711:   VecDestroy(&pcbddc->vec2_R);
3712:   ISDestroy(&pcbddc->is_R_local);
3713:   VecScatterDestroy(&pcbddc->R_to_B);
3714:   VecScatterDestroy(&pcbddc->R_to_D);
3715:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3716:   KSPReset(pcbddc->ksp_D);
3717:   KSPReset(pcbddc->ksp_R);
3718:   KSPReset(pcbddc->coarse_ksp);
3719:   MatDestroy(&pcbddc->local_mat);
3720:   PetscFree(pcbddc->primal_indices_local_idxs);
3721:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3722:   PetscFree(pcbddc->global_primal_indices);
3723:   ISDestroy(&pcbddc->coarse_subassembling);
3724:   MatDestroy(&pcbddc->benign_change);
3725:   VecDestroy(&pcbddc->benign_vec);
3726:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3727:   MatDestroy(&pcbddc->benign_B0);
3728:   PetscSFDestroy(&pcbddc->benign_sf);
3729:   if (pcbddc->benign_zerodiag_subs) {
3730:     PetscInt i;
3731:     for (i=0;i<pcbddc->benign_n;i++) {
3732:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3733:     }
3734:     PetscFree(pcbddc->benign_zerodiag_subs);
3735:   }
3736:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3737:   return(0);
3738: }

3740: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3741: {
3742:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3743:   PC_IS          *pcis = (PC_IS*)pc->data;
3744:   VecType        impVecType;
3745:   PetscInt       n_constraints,n_R,old_size;

3749:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3750:   n_R = pcis->n - pcbddc->n_vertices;
3751:   VecGetType(pcis->vec1_N,&impVecType);
3752:   /* local work vectors (try to avoid unneeded work)*/
3753:   /* R nodes */
3754:   old_size = -1;
3755:   if (pcbddc->vec1_R) {
3756:     VecGetSize(pcbddc->vec1_R,&old_size);
3757:   }
3758:   if (n_R != old_size) {
3759:     VecDestroy(&pcbddc->vec1_R);
3760:     VecDestroy(&pcbddc->vec2_R);
3761:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3762:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3763:     VecSetType(pcbddc->vec1_R,impVecType);
3764:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3765:   }
3766:   /* local primal dofs */
3767:   old_size = -1;
3768:   if (pcbddc->vec1_P) {
3769:     VecGetSize(pcbddc->vec1_P,&old_size);
3770:   }
3771:   if (pcbddc->local_primal_size != old_size) {
3772:     VecDestroy(&pcbddc->vec1_P);
3773:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3774:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3775:     VecSetType(pcbddc->vec1_P,impVecType);
3776:   }
3777:   /* local explicit constraints */
3778:   old_size = -1;
3779:   if (pcbddc->vec1_C) {
3780:     VecGetSize(pcbddc->vec1_C,&old_size);
3781:   }
3782:   if (n_constraints && n_constraints != old_size) {
3783:     VecDestroy(&pcbddc->vec1_C);
3784:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3785:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3786:     VecSetType(pcbddc->vec1_C,impVecType);
3787:   }
3788:   return(0);
3789: }

3791: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3792: {
3793:   PetscErrorCode  ierr;
3794:   /* pointers to pcis and pcbddc */
3795:   PC_IS*          pcis = (PC_IS*)pc->data;
3796:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3797:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3798:   /* submatrices of local problem */
3799:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3800:   /* submatrices of local coarse problem */
3801:   Mat             S_VV,S_CV,S_VC,S_CC;
3802:   /* working matrices */
3803:   Mat             C_CR;
3804:   /* additional working stuff */
3805:   PC              pc_R;
3806:   Mat             F,Brhs = NULL;
3807:   Vec             dummy_vec;
3808:   PetscBool       isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3809:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3810:   PetscScalar     *work;
3811:   PetscInt        *idx_V_B;
3812:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3813:   PetscInt        i,n_R,n_D,n_B;

3815:   /* some shortcuts to scalars */
3816:   PetscScalar     one=1.0,m_one=-1.0;

3819:   if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");

3821:   /* Set Non-overlapping dimensions */
3822:   n_vertices = pcbddc->n_vertices;
3823:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3824:   n_B = pcis->n_B;
3825:   n_D = pcis->n - n_B;
3826:   n_R = pcis->n - n_vertices;

3828:   /* vertices in boundary numbering */
3829:   PetscMalloc1(n_vertices,&idx_V_B);
3830:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3831:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);

3833:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3834:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3835:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3836:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3837:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3838:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3839:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3840:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3841:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3842:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

3844:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3845:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3846:   PCSetUp(pc_R);
3847:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3848:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3849:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3850:   lda_rhs = n_R;
3851:   need_benign_correction = PETSC_FALSE;
3852:   if (isLU || isILU || isCHOL) {
3853:     PCFactorGetMatrix(pc_R,&F);
3854:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3855:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3856:     MatFactorType      type;

3858:     F = reuse_solver->F;
3859:     MatGetFactorType(F,&type);
3860:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3861:     MatGetSize(F,&lda_rhs,NULL);
3862:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3863:   } else {
3864:     F = NULL;
3865:   }

3867:   /* determine if we can use a sparse right-hand side */
3868:   sparserhs = PETSC_FALSE;
3869:   if (F) {
3870:     MatSolverType solver;

3872:     MatFactorGetSolverType(F,&solver);
3873:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3874:   }

3876:   /* allocate workspace */
3877:   n = 0;
3878:   if (n_constraints) {
3879:     n += lda_rhs*n_constraints;
3880:   }
3881:   if (n_vertices) {
3882:     n = PetscMax(2*lda_rhs*n_vertices,n);
3883:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3884:   }
3885:   if (!pcbddc->symmetric_primal) {
3886:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3887:   }
3888:   PetscMalloc1(n,&work);

3890:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3891:   dummy_vec = NULL;
3892:   if (need_benign_correction && lda_rhs != n_R && F) {
3893:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);
3894:   }

3896:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3897:   if (n_constraints) {
3898:     Mat         M3,C_B;
3899:     IS          is_aux;
3900:     PetscScalar *array,*array2;

3902:     MatDestroy(&pcbddc->local_auxmat1);
3903:     MatDestroy(&pcbddc->local_auxmat2);

3905:     /* Extract constraints on R nodes: C_{CR}  */
3906:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3907:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3908:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

3910:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3911:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3912:     if (!sparserhs) {
3913:       PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
3914:       for (i=0;i<n_constraints;i++) {
3915:         const PetscScalar *row_cmat_values;
3916:         const PetscInt    *row_cmat_indices;
3917:         PetscInt          size_of_constraint,j;

3919:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3920:         for (j=0;j<size_of_constraint;j++) {
3921:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3922:         }
3923:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3924:       }
3925:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3926:     } else {
3927:       Mat tC_CR;

3929:       MatScale(C_CR,-1.0);
3930:       if (lda_rhs != n_R) {
3931:         PetscScalar *aa;
3932:         PetscInt    r,*ii,*jj;
3933:         PetscBool   done;

3935:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3936:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3937:         MatSeqAIJGetArray(C_CR,&aa);
3938:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
3939:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3940:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3941:       } else {
3942:         PetscObjectReference((PetscObject)C_CR);
3943:         tC_CR = C_CR;
3944:       }
3945:       MatCreateTranspose(tC_CR,&Brhs);
3946:       MatDestroy(&tC_CR);
3947:     }
3948:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
3949:     if (F) {
3950:       if (need_benign_correction) {
3951:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3953:         /* rhs is already zero on interior dofs, no need to change the rhs */
3954:         PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
3955:       }
3956:       MatMatSolve(F,Brhs,local_auxmat2_R);
3957:       if (need_benign_correction) {
3958:         PetscScalar        *marr;
3959:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3961:         MatDenseGetArray(local_auxmat2_R,&marr);
3962:         if (lda_rhs != n_R) {
3963:           for (i=0;i<n_constraints;i++) {
3964:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3965:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3966:             VecResetArray(dummy_vec);
3967:           }
3968:         } else {
3969:           for (i=0;i<n_constraints;i++) {
3970:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3971:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3972:             VecResetArray(pcbddc->vec1_R);
3973:           }
3974:         }
3975:         MatDenseRestoreArray(local_auxmat2_R,&marr);
3976:       }
3977:     } else {
3978:       PetscScalar *marr;

3980:       MatDenseGetArray(local_auxmat2_R,&marr);
3981:       for (i=0;i<n_constraints;i++) {
3982:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
3983:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
3984:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3985:         VecResetArray(pcbddc->vec1_R);
3986:         VecResetArray(pcbddc->vec2_R);
3987:       }
3988:       MatDenseRestoreArray(local_auxmat2_R,&marr);
3989:     }
3990:     if (sparserhs) {
3991:       MatScale(C_CR,-1.0);
3992:     }
3993:     MatDestroy(&Brhs);
3994:     if (!pcbddc->switch_static) {
3995:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
3996:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
3997:       MatDenseGetArray(local_auxmat2_R,&array2);
3998:       for (i=0;i<n_constraints;i++) {
3999:         VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4000:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
4001:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4002:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4003:         VecResetArray(pcis->vec1_B);
4004:         VecResetArray(pcbddc->vec1_R);
4005:       }
4006:       MatDenseRestoreArray(local_auxmat2_R,&array2);
4007:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4008:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4009:     } else {
4010:       if (lda_rhs != n_R) {
4011:         IS dummy;

4013:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4014:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4015:         ISDestroy(&dummy);
4016:       } else {
4017:         PetscObjectReference((PetscObject)local_auxmat2_R);
4018:         pcbddc->local_auxmat2 = local_auxmat2_R;
4019:       }
4020:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4021:     }
4022:     ISDestroy(&is_aux);
4023:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
4024:     MatScale(M3,m_one);
4025:     if (isCHOL) {
4026:       MatCholeskyFactor(M3,NULL,NULL);
4027:     } else {
4028:       MatLUFactor(M3,NULL,NULL,NULL);
4029:     }
4030:     MatSeqDenseInvertFactors_Private(M3);
4031:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4032:     MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4033:     MatDestroy(&C_B);
4034:     MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4035:     MatDestroy(&M3);
4036:   }

4038:   /* Get submatrices from subdomain matrix */
4039:   if (n_vertices) {
4040:     IS        is_aux;
4041:     PetscBool isseqaij;

4043:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4044:       IS tis;

4046:       ISDuplicate(pcbddc->is_R_local,&tis);
4047:       ISSort(tis);
4048:       ISComplement(tis,0,pcis->n,&is_aux);
4049:       ISDestroy(&tis);
4050:     } else {
4051:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4052:     }
4053:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4054:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4055:     PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
4056:     if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
4057:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4058:     }
4059:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4060:     ISDestroy(&is_aux);
4061:   }

4063:   /* Matrix of coarse basis functions (local) */
4064:   if (pcbddc->coarse_phi_B) {
4065:     PetscInt on_B,on_primal,on_D=n_D;
4066:     if (pcbddc->coarse_phi_D) {
4067:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4068:     }
4069:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4070:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4071:       PetscScalar *marray;

4073:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4074:       PetscFree(marray);
4075:       MatDestroy(&pcbddc->coarse_phi_B);
4076:       MatDestroy(&pcbddc->coarse_psi_B);
4077:       MatDestroy(&pcbddc->coarse_phi_D);
4078:       MatDestroy(&pcbddc->coarse_psi_D);
4079:     }
4080:   }

4082:   if (!pcbddc->coarse_phi_B) {
4083:     PetscScalar *marr;

4085:     /* memory size */
4086:     n = n_B*pcbddc->local_primal_size;
4087:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4088:     if (!pcbddc->symmetric_primal) n *= 2;
4089:     PetscCalloc1(n,&marr);
4090:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4091:     marr += n_B*pcbddc->local_primal_size;
4092:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4093:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4094:       marr += n_D*pcbddc->local_primal_size;
4095:     }
4096:     if (!pcbddc->symmetric_primal) {
4097:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4098:       marr += n_B*pcbddc->local_primal_size;
4099:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4100:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4101:       }
4102:     } else {
4103:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4104:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4105:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4106:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4107:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4108:       }
4109:     }
4110:   }

4112:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4113:   p0_lidx_I = NULL;
4114:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4115:     const PetscInt *idxs;

4117:     ISGetIndices(pcis->is_I_local,&idxs);
4118:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4119:     for (i=0;i<pcbddc->benign_n;i++) {
4120:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4121:     }
4122:     ISRestoreIndices(pcis->is_I_local,&idxs);
4123:   }

4125:   /* vertices */
4126:   if (n_vertices) {
4127:     PetscBool restoreavr = PETSC_FALSE;

4129:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

4131:     if (n_R) {
4132:       Mat          A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4133:       PetscBLASInt B_N,B_one = 1;
4134:       PetscScalar  *x,*y;

4136:       MatScale(A_RV,m_one);
4137:       if (need_benign_correction) {
4138:         ISLocalToGlobalMapping RtoN;
4139:         IS                     is_p0;
4140:         PetscInt               *idxs_p0,n;

4142:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4143:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4144:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4145:         if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n);
4146:         ISLocalToGlobalMappingDestroy(&RtoN);
4147:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4148:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4149:         ISDestroy(&is_p0);
4150:       }

4152:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4153:       if (!sparserhs || need_benign_correction) {
4154:         if (lda_rhs == n_R) {
4155:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4156:         } else {
4157:           PetscScalar    *av,*array;
4158:           const PetscInt *xadj,*adjncy;
4159:           PetscInt       n;
4160:           PetscBool      flg_row;

4162:           array = work+lda_rhs*n_vertices;
4163:           PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
4164:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4165:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4166:           MatSeqAIJGetArray(A_RV,&av);
4167:           for (i=0;i<n;i++) {
4168:             PetscInt j;
4169:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4170:           }
4171:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4172:           MatDestroy(&A_RV);
4173:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4174:         }
4175:         if (need_benign_correction) {
4176:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4177:           PetscScalar        *marr;

4179:           MatDenseGetArray(A_RV,&marr);
4180:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4182:                  | 0 0  0 | (V)
4183:              L = | 0 0 -1 | (P-p0)
4184:                  | 0 0 -1 | (p0)

4186:           */
4187:           for (i=0;i<reuse_solver->benign_n;i++) {
4188:             const PetscScalar *vals;
4189:             const PetscInt    *idxs,*idxs_zero;
4190:             PetscInt          n,j,nz;

4192:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4193:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4194:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4195:             for (j=0;j<n;j++) {
4196:               PetscScalar val = vals[j];
4197:               PetscInt    k,col = idxs[j];
4198:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4199:             }
4200:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4201:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4202:           }
4203:           MatDenseRestoreArray(A_RV,&marr);
4204:         }
4205:         PetscObjectReference((PetscObject)A_RV);
4206:         Brhs = A_RV;
4207:       } else {
4208:         Mat tA_RVT,A_RVT;

4210:         if (!pcbddc->symmetric_primal) {
4211:           /* A_RV already scaled by -1 */
4212:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4213:         } else {
4214:           restoreavr = PETSC_TRUE;
4215:           MatScale(A_VR,-1.0);
4216:           PetscObjectReference((PetscObject)A_VR);
4217:           A_RVT = A_VR;
4218:         }
4219:         if (lda_rhs != n_R) {
4220:           PetscScalar *aa;
4221:           PetscInt    r,*ii,*jj;
4222:           PetscBool   done;

4224:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4225:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4226:           MatSeqAIJGetArray(A_RVT,&aa);
4227:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4228:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4229:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4230:         } else {
4231:           PetscObjectReference((PetscObject)A_RVT);
4232:           tA_RVT = A_RVT;
4233:         }
4234:         MatCreateTranspose(tA_RVT,&Brhs);
4235:         MatDestroy(&tA_RVT);
4236:         MatDestroy(&A_RVT);
4237:       }
4238:       if (F) {
4239:         /* need to correct the rhs */
4240:         if (need_benign_correction) {
4241:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4242:           PetscScalar        *marr;

4244:           MatDenseGetArray(Brhs,&marr);
4245:           if (lda_rhs != n_R) {
4246:             for (i=0;i<n_vertices;i++) {
4247:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4248:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4249:               VecResetArray(dummy_vec);
4250:             }
4251:           } else {
4252:             for (i=0;i<n_vertices;i++) {
4253:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4254:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4255:               VecResetArray(pcbddc->vec1_R);
4256:             }
4257:           }
4258:           MatDenseRestoreArray(Brhs,&marr);
4259:         }
4260:         MatMatSolve(F,Brhs,A_RRmA_RV);
4261:         if (restoreavr) {
4262:           MatScale(A_VR,-1.0);
4263:         }
4264:         /* need to correct the solution */
4265:         if (need_benign_correction) {
4266:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4267:           PetscScalar        *marr;

4269:           MatDenseGetArray(A_RRmA_RV,&marr);
4270:           if (lda_rhs != n_R) {
4271:             for (i=0;i<n_vertices;i++) {
4272:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4273:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4274:               VecResetArray(dummy_vec);
4275:             }
4276:           } else {
4277:             for (i=0;i<n_vertices;i++) {
4278:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4279:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4280:               VecResetArray(pcbddc->vec1_R);
4281:             }
4282:           }
4283:           MatDenseRestoreArray(A_RRmA_RV,&marr);
4284:         }
4285:       } else {
4286:         MatDenseGetArray(Brhs,&y);
4287:         for (i=0;i<n_vertices;i++) {
4288:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4289:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4290:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4291:           VecResetArray(pcbddc->vec1_R);
4292:           VecResetArray(pcbddc->vec2_R);
4293:         }
4294:         MatDenseRestoreArray(Brhs,&y);
4295:       }
4296:       MatDestroy(&A_RV);
4297:       MatDestroy(&Brhs);
4298:       /* S_VV and S_CV */
4299:       if (n_constraints) {
4300:         Mat B;

4302:         PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4303:         for (i=0;i<n_vertices;i++) {
4304:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4305:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4306:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4307:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4308:           VecResetArray(pcis->vec1_B);
4309:           VecResetArray(pcbddc->vec1_R);
4310:         }
4311:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4312:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4313:         MatDestroy(&B);
4314:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4315:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4316:         MatScale(S_CV,m_one);
4317:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4318:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4319:         MatDestroy(&B);
4320:       }
4321:       if (lda_rhs != n_R) {
4322:         MatDestroy(&A_RRmA_RV);
4323:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4324:         MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4325:       }
4326:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4327:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4328:       if (need_benign_correction) {
4329:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4330:         PetscScalar      *marr,*sums;

4332:         PetscMalloc1(n_vertices,&sums);
4333:         MatDenseGetArray(S_VVt,&marr);
4334:         for (i=0;i<reuse_solver->benign_n;i++) {
4335:           const PetscScalar *vals;
4336:           const PetscInt    *idxs,*idxs_zero;
4337:           PetscInt          n,j,nz;

4339:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4340:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4341:           for (j=0;j<n_vertices;j++) {
4342:             PetscInt k;
4343:             sums[j] = 0.;
4344:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4345:           }
4346:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4347:           for (j=0;j<n;j++) {
4348:             PetscScalar val = vals[j];
4349:             PetscInt k;
4350:             for (k=0;k<n_vertices;k++) {
4351:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4352:             }
4353:           }
4354:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4355:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4356:         }
4357:         PetscFree(sums);
4358:         MatDenseRestoreArray(S_VVt,&marr);
4359:         MatDestroy(&A_RV_bcorr);
4360:       }
4361:       MatDestroy(&A_RRmA_RV);
4362:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4363:       MatDenseGetArray(A_VV,&x);
4364:       MatDenseGetArray(S_VVt,&y);
4365:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4366:       MatDenseRestoreArray(A_VV,&x);
4367:       MatDenseRestoreArray(S_VVt,&y);
4368:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4369:       MatDestroy(&S_VVt);
4370:     } else {
4371:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4372:     }
4373:     MatDestroy(&A_VV);

4375:     /* coarse basis functions */
4376:     for (i=0;i<n_vertices;i++) {
4377:       PetscScalar *y;

4379:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4380:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4381:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4382:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4383:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4384:       y[n_B*i+idx_V_B[i]] = 1.0;
4385:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4386:       VecResetArray(pcis->vec1_B);

4388:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4389:         PetscInt j;

4391:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4392:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4393:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4394:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4395:         VecResetArray(pcis->vec1_D);
4396:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4397:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4398:       }
4399:       VecResetArray(pcbddc->vec1_R);
4400:     }
4401:     /* if n_R == 0 the object is not destroyed */
4402:     MatDestroy(&A_RV);
4403:   }
4404:   VecDestroy(&dummy_vec);

4406:   if (n_constraints) {
4407:     Mat B;

4409:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4410:     MatScale(S_CC,m_one);
4411:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4412:     MatScale(S_CC,m_one);
4413:     if (n_vertices) {
4414:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4415:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4416:       } else {
4417:         Mat S_VCt;

4419:         if (lda_rhs != n_R) {
4420:           MatDestroy(&B);
4421:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4422:           MatSeqDenseSetLDA(B,lda_rhs);
4423:         }
4424:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4425:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4426:         MatDestroy(&S_VCt);
4427:       }
4428:     }
4429:     MatDestroy(&B);
4430:     /* coarse basis functions */
4431:     for (i=0;i<n_constraints;i++) {
4432:       PetscScalar *y;

4434:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4435:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4436:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4437:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4438:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4439:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4440:       VecResetArray(pcis->vec1_B);
4441:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4442:         PetscInt j;

4444:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4445:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4446:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4447:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4448:         VecResetArray(pcis->vec1_D);
4449:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4450:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4451:       }
4452:       VecResetArray(pcbddc->vec1_R);
4453:     }
4454:   }
4455:   if (n_constraints) {
4456:     MatDestroy(&local_auxmat2_R);
4457:   }
4458:   PetscFree(p0_lidx_I);

4460:   /* coarse matrix entries relative to B_0 */
4461:   if (pcbddc->benign_n) {
4462:     Mat         B0_B,B0_BPHI;
4463:     IS          is_dummy;
4464:     PetscScalar *data;
4465:     PetscInt    j;

4467:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4468:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4469:     ISDestroy(&is_dummy);
4470:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4471:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4472:     MatDenseGetArray(B0_BPHI,&data);
4473:     for (j=0;j<pcbddc->benign_n;j++) {
4474:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4475:       for (i=0;i<pcbddc->local_primal_size;i++) {
4476:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4477:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4478:       }
4479:     }
4480:     MatDenseRestoreArray(B0_BPHI,&data);
4481:     MatDestroy(&B0_B);
4482:     MatDestroy(&B0_BPHI);
4483:   }

4485:   /* compute other basis functions for non-symmetric problems */
4486:   if (!pcbddc->symmetric_primal) {
4487:     Mat         B_V=NULL,B_C=NULL;
4488:     PetscScalar *marray;

4490:     if (n_constraints) {
4491:       Mat S_CCT,C_CRT;

4493:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4494:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4495:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4496:       MatDestroy(&S_CCT);
4497:       if (n_vertices) {
4498:         Mat S_VCT;

4500:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4501:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4502:         MatDestroy(&S_VCT);
4503:       }
4504:       MatDestroy(&C_CRT);
4505:     } else {
4506:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4507:     }
4508:     if (n_vertices && n_R) {
4509:       PetscScalar    *av,*marray;
4510:       const PetscInt *xadj,*adjncy;
4511:       PetscInt       n;
4512:       PetscBool      flg_row;

4514:       /* B_V = B_V - A_VR^T */
4515:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4516:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4517:       MatSeqAIJGetArray(A_VR,&av);
4518:       MatDenseGetArray(B_V,&marray);
4519:       for (i=0;i<n;i++) {
4520:         PetscInt j;
4521:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4522:       }
4523:       MatDenseRestoreArray(B_V,&marray);
4524:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4525:       MatDestroy(&A_VR);
4526:     }

4528:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4529:     if (n_vertices) {
4530:       MatDenseGetArray(B_V,&marray);
4531:       for (i=0;i<n_vertices;i++) {
4532:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4533:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4534:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4535:         VecResetArray(pcbddc->vec1_R);
4536:         VecResetArray(pcbddc->vec2_R);
4537:       }
4538:       MatDenseRestoreArray(B_V,&marray);
4539:     }
4540:     if (B_C) {
4541:       MatDenseGetArray(B_C,&marray);
4542:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4543:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4544:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4545:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4546:         VecResetArray(pcbddc->vec1_R);
4547:         VecResetArray(pcbddc->vec2_R);
4548:       }
4549:       MatDenseRestoreArray(B_C,&marray);
4550:     }
4551:     /* coarse basis functions */
4552:     for (i=0;i<pcbddc->local_primal_size;i++) {
4553:       PetscScalar *y;

4555:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4556:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4557:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4558:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4559:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4560:       if (i<n_vertices) {
4561:         y[n_B*i+idx_V_B[i]] = 1.0;
4562:       }
4563:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4564:       VecResetArray(pcis->vec1_B);

4566:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4567:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4568:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4569:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4570:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4571:         VecResetArray(pcis->vec1_D);
4572:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4573:       }
4574:       VecResetArray(pcbddc->vec1_R);
4575:     }
4576:     MatDestroy(&B_V);
4577:     MatDestroy(&B_C);
4578:   }

4580:   /* free memory */
4581:   PetscFree(idx_V_B);
4582:   MatDestroy(&S_VV);
4583:   MatDestroy(&S_CV);
4584:   MatDestroy(&S_VC);
4585:   MatDestroy(&S_CC);
4586:   PetscFree(work);
4587:   if (n_vertices) {
4588:     MatDestroy(&A_VR);
4589:   }
4590:   if (n_constraints) {
4591:     MatDestroy(&C_CR);
4592:   }
4593:   /* Checking coarse_sub_mat and coarse basis functios */
4594:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4595:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4596:   if (pcbddc->dbg_flag) {
4597:     Mat         coarse_sub_mat;
4598:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4599:     Mat         coarse_phi_D,coarse_phi_B;
4600:     Mat         coarse_psi_D,coarse_psi_B;
4601:     Mat         A_II,A_BB,A_IB,A_BI;
4602:     Mat         C_B,CPHI;
4603:     IS          is_dummy;
4604:     Vec         mones;
4605:     MatType     checkmattype=MATSEQAIJ;
4606:     PetscReal   real_value;

4608:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4609:       Mat A;
4610:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4611:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4612:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4613:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4614:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4615:       MatDestroy(&A);
4616:     } else {
4617:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4618:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4619:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4620:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4621:     }
4622:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4623:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4624:     if (!pcbddc->symmetric_primal) {
4625:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4626:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4627:     }
4628:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4630:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4631:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4632:     PetscViewerFlush(pcbddc->dbg_viewer);
4633:     if (!pcbddc->symmetric_primal) {
4634:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4635:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4636:       MatDestroy(&AUXMAT);
4637:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4638:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4639:       MatDestroy(&AUXMAT);
4640:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4641:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4642:       MatDestroy(&AUXMAT);
4643:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4644:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4645:       MatDestroy(&AUXMAT);
4646:     } else {
4647:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4648:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4649:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4650:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4651:       MatDestroy(&AUXMAT);
4652:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4653:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4654:       MatDestroy(&AUXMAT);
4655:     }
4656:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4657:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4658:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4659:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4660:     if (pcbddc->benign_n) {
4661:       Mat         B0_B,B0_BPHI;
4662:       PetscScalar *data,*data2;
4663:       PetscInt    j;

4665:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4666:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4667:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4668:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4669:       MatDenseGetArray(TM1,&data);
4670:       MatDenseGetArray(B0_BPHI,&data2);
4671:       for (j=0;j<pcbddc->benign_n;j++) {
4672:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4673:         for (i=0;i<pcbddc->local_primal_size;i++) {
4674:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4675:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4676:         }
4677:       }
4678:       MatDenseRestoreArray(TM1,&data);
4679:       MatDenseRestoreArray(B0_BPHI,&data2);
4680:       MatDestroy(&B0_B);
4681:       ISDestroy(&is_dummy);
4682:       MatDestroy(&B0_BPHI);
4683:     }
4684: #if 0
4685:   {
4686:     PetscViewer viewer;
4687:     char filename[256];
4688:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4689:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4690:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4691:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4692:     MatView(coarse_sub_mat,viewer);
4693:     PetscObjectSetName((PetscObject)TM1,"projected");
4694:     MatView(TM1,viewer);
4695:     if (pcbddc->coarse_phi_B) {
4696:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4697:       MatView(pcbddc->coarse_phi_B,viewer);
4698:     }
4699:     if (pcbddc->coarse_phi_D) {
4700:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4701:       MatView(pcbddc->coarse_phi_D,viewer);
4702:     }
4703:     if (pcbddc->coarse_psi_B) {
4704:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4705:       MatView(pcbddc->coarse_psi_B,viewer);
4706:     }
4707:     if (pcbddc->coarse_psi_D) {
4708:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4709:       MatView(pcbddc->coarse_psi_D,viewer);
4710:     }
4711:     PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4712:     MatView(pcbddc->local_mat,viewer);
4713:     PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4714:     MatView(pcbddc->ConstraintMatrix,viewer);
4715:     PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4716:     ISView(pcis->is_I_local,viewer);
4717:     PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4718:     ISView(pcis->is_B_local,viewer);
4719:     PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4720:     ISView(pcbddc->is_R_local,viewer);
4721:     PetscViewerDestroy(&viewer);
4722:   }
4723: #endif
4724:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4725:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4726:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4727:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4729:     /* check constraints */
4730:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4731:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4732:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4733:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4734:     } else {
4735:       PetscScalar *data;
4736:       Mat         tmat;
4737:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4738:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4739:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4740:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4741:       MatDestroy(&tmat);
4742:     }
4743:     MatCreateVecs(CPHI,&mones,NULL);
4744:     VecSet(mones,-1.0);
4745:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4746:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4747:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4748:     if (!pcbddc->symmetric_primal) {
4749:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4750:       VecSet(mones,-1.0);
4751:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4752:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4753:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4754:     }
4755:     MatDestroy(&C_B);
4756:     MatDestroy(&CPHI);
4757:     ISDestroy(&is_dummy);
4758:     VecDestroy(&mones);
4759:     PetscViewerFlush(pcbddc->dbg_viewer);
4760:     MatDestroy(&A_II);
4761:     MatDestroy(&A_BB);
4762:     MatDestroy(&A_IB);
4763:     MatDestroy(&A_BI);
4764:     MatDestroy(&TM1);
4765:     MatDestroy(&TM2);
4766:     MatDestroy(&TM3);
4767:     MatDestroy(&TM4);
4768:     MatDestroy(&coarse_phi_D);
4769:     MatDestroy(&coarse_phi_B);
4770:     if (!pcbddc->symmetric_primal) {
4771:       MatDestroy(&coarse_psi_D);
4772:       MatDestroy(&coarse_psi_B);
4773:     }
4774:     MatDestroy(&coarse_sub_mat);
4775:   }
4776:   /* get back data */
4777:   *coarse_submat_vals_n = coarse_submat_vals;
4778:   return(0);
4779: }

4781: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4782: {
4783:   Mat            *work_mat;
4784:   IS             isrow_s,iscol_s;
4785:   PetscBool      rsorted,csorted;
4786:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4790:   ISSorted(isrow,&rsorted);
4791:   ISSorted(iscol,&csorted);
4792:   ISGetLocalSize(isrow,&rsize);
4793:   ISGetLocalSize(iscol,&csize);

4795:   if (!rsorted) {
4796:     const PetscInt *idxs;
4797:     PetscInt *idxs_sorted,i;

4799:     PetscMalloc1(rsize,&idxs_perm_r);
4800:     PetscMalloc1(rsize,&idxs_sorted);
4801:     for (i=0;i<rsize;i++) {
4802:       idxs_perm_r[i] = i;
4803:     }
4804:     ISGetIndices(isrow,&idxs);
4805:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4806:     for (i=0;i<rsize;i++) {
4807:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4808:     }
4809:     ISRestoreIndices(isrow,&idxs);
4810:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4811:   } else {
4812:     PetscObjectReference((PetscObject)isrow);
4813:     isrow_s = isrow;
4814:   }

4816:   if (!csorted) {
4817:     if (isrow == iscol) {
4818:       PetscObjectReference((PetscObject)isrow_s);
4819:       iscol_s = isrow_s;
4820:     } else {
4821:       const PetscInt *idxs;
4822:       PetscInt       *idxs_sorted,i;

4824:       PetscMalloc1(csize,&idxs_perm_c);
4825:       PetscMalloc1(csize,&idxs_sorted);
4826:       for (i=0;i<csize;i++) {
4827:         idxs_perm_c[i] = i;
4828:       }
4829:       ISGetIndices(iscol,&idxs);
4830:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4831:       for (i=0;i<csize;i++) {
4832:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4833:       }
4834:       ISRestoreIndices(iscol,&idxs);
4835:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4836:     }
4837:   } else {
4838:     PetscObjectReference((PetscObject)iscol);
4839:     iscol_s = iscol;
4840:   }

4842:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

4844:   if (!rsorted || !csorted) {
4845:     Mat      new_mat;
4846:     IS       is_perm_r,is_perm_c;

4848:     if (!rsorted) {
4849:       PetscInt *idxs_r,i;
4850:       PetscMalloc1(rsize,&idxs_r);
4851:       for (i=0;i<rsize;i++) {
4852:         idxs_r[idxs_perm_r[i]] = i;
4853:       }
4854:       PetscFree(idxs_perm_r);
4855:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4856:     } else {
4857:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4858:     }
4859:     ISSetPermutation(is_perm_r);

4861:     if (!csorted) {
4862:       if (isrow_s == iscol_s) {
4863:         PetscObjectReference((PetscObject)is_perm_r);
4864:         is_perm_c = is_perm_r;
4865:       } else {
4866:         PetscInt *idxs_c,i;
4867:         if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4868:         PetscMalloc1(csize,&idxs_c);
4869:         for (i=0;i<csize;i++) {
4870:           idxs_c[idxs_perm_c[i]] = i;
4871:         }
4872:         PetscFree(idxs_perm_c);
4873:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4874:       }
4875:     } else {
4876:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4877:     }
4878:     ISSetPermutation(is_perm_c);

4880:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4881:     MatDestroy(&work_mat[0]);
4882:     work_mat[0] = new_mat;
4883:     ISDestroy(&is_perm_r);
4884:     ISDestroy(&is_perm_c);
4885:   }

4887:   PetscObjectReference((PetscObject)work_mat[0]);
4888:   *B = work_mat[0];
4889:   MatDestroyMatrices(1,&work_mat);
4890:   ISDestroy(&isrow_s);
4891:   ISDestroy(&iscol_s);
4892:   return(0);
4893: }

4895: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4896: {
4897:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4898:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4899:   Mat            new_mat,lA;
4900:   IS             is_local,is_global;
4901:   PetscInt       local_size;
4902:   PetscBool      isseqaij;

4906:   MatDestroy(&pcbddc->local_mat);
4907:   MatGetSize(matis->A,&local_size,NULL);
4908:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
4909:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
4910:   ISDestroy(&is_local);
4911:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
4912:   ISDestroy(&is_global);

4914:   /* check */
4915:   if (pcbddc->dbg_flag) {
4916:     Vec       x,x_change;
4917:     PetscReal error;

4919:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
4920:     VecSetRandom(x,NULL);
4921:     MatMult(ChangeOfBasisMatrix,x,x_change);
4922:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4923:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4924:     MatMult(new_mat,matis->x,matis->y);
4925:     if (!pcbddc->change_interior) {
4926:       const PetscScalar *x,*y,*v;
4927:       PetscReal         lerror = 0.;
4928:       PetscInt          i;

4930:       VecGetArrayRead(matis->x,&x);
4931:       VecGetArrayRead(matis->y,&y);
4932:       VecGetArrayRead(matis->counter,&v);
4933:       for (i=0;i<local_size;i++)
4934:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
4935:           lerror = PetscAbsScalar(x[i]-y[i]);
4936:       VecRestoreArrayRead(matis->x,&x);
4937:       VecRestoreArrayRead(matis->y,&y);
4938:       VecRestoreArrayRead(matis->counter,&v);
4939:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
4940:       if (error > PETSC_SMALL) {
4941:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4942:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error);
4943:         } else {
4944:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error);
4945:         }
4946:       }
4947:     }
4948:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4949:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4950:     VecAXPY(x,-1.0,x_change);
4951:     VecNorm(x,NORM_INFINITY,&error);
4952:     if (error > PETSC_SMALL) {
4953:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4954:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
4955:       } else {
4956:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error);
4957:       }
4958:     }
4959:     VecDestroy(&x);
4960:     VecDestroy(&x_change);
4961:   }

4963:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4964:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

4966:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4967:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
4968:   if (isseqaij) {
4969:     MatDestroy(&pcbddc->local_mat);
4970:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4971:     if (lA) {
4972:       Mat work;
4973:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4974:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4975:       MatDestroy(&work);
4976:     }
4977:   } else {
4978:     Mat work_mat;

4980:     MatDestroy(&pcbddc->local_mat);
4981:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4982:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4983:     MatDestroy(&work_mat);
4984:     if (lA) {
4985:       Mat work;
4986:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4987:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4988:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4989:       MatDestroy(&work);
4990:     }
4991:   }
4992:   if (matis->A->symmetric_set) {
4993:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
4994: #if !defined(PETSC_USE_COMPLEX)
4995:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
4996: #endif
4997:   }
4998:   MatDestroy(&new_mat);
4999:   return(0);
5000: }

5002: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5003: {
5004:   PC_IS*          pcis = (PC_IS*)(pc->data);
5005:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
5006:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5007:   PetscInt        *idx_R_local=NULL;
5008:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
5009:   PetscInt        vbs,bs;
5010:   PetscBT         bitmask=NULL;
5011:   PetscErrorCode  ierr;

5014:   /*
5015:     No need to setup local scatters if
5016:       - primal space is unchanged
5017:         AND
5018:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5019:         AND
5020:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5021:   */
5022:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5023:     return(0);
5024:   }
5025:   /* destroy old objects */
5026:   ISDestroy(&pcbddc->is_R_local);
5027:   VecScatterDestroy(&pcbddc->R_to_B);
5028:   VecScatterDestroy(&pcbddc->R_to_D);
5029:   /* Set Non-overlapping dimensions */
5030:   n_B = pcis->n_B;
5031:   n_D = pcis->n - n_B;
5032:   n_vertices = pcbddc->n_vertices;

5034:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

5036:   /* create auxiliary bitmask and allocate workspace */
5037:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5038:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5039:     PetscBTCreate(pcis->n,&bitmask);
5040:     for (i=0;i<n_vertices;i++) {
5041:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5042:     }

5044:     for (i=0, n_R=0; i<pcis->n; i++) {
5045:       if (!PetscBTLookup(bitmask,i)) {
5046:         idx_R_local[n_R++] = i;
5047:       }
5048:     }
5049:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5050:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5052:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5053:     ISGetLocalSize(reuse_solver->is_R,&n_R);
5054:   }

5056:   /* Block code */
5057:   vbs = 1;
5058:   MatGetBlockSize(pcbddc->local_mat,&bs);
5059:   if (bs>1 && !(n_vertices%bs)) {
5060:     PetscBool is_blocked = PETSC_TRUE;
5061:     PetscInt  *vary;
5062:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5063:       PetscMalloc1(pcis->n/bs,&vary);
5064:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
5065:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5066:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5067:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5068:       for (i=0; i<pcis->n/bs; i++) {
5069:         if (vary[i]!=0 && vary[i]!=bs) {
5070:           is_blocked = PETSC_FALSE;
5071:           break;
5072:         }
5073:       }
5074:       PetscFree(vary);
5075:     } else {
5076:       /* Verify directly the R set */
5077:       for (i=0; i<n_R/bs; i++) {
5078:         PetscInt j,node=idx_R_local[bs*i];
5079:         for (j=1; j<bs; j++) {
5080:           if (node != idx_R_local[bs*i+j]-j) {
5081:             is_blocked = PETSC_FALSE;
5082:             break;
5083:           }
5084:         }
5085:       }
5086:     }
5087:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5088:       vbs = bs;
5089:       for (i=0;i<n_R/vbs;i++) {
5090:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5091:       }
5092:     }
5093:   }
5094:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5095:   if (sub_schurs && sub_schurs->reuse_solver) {
5096:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5098:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5099:     ISDestroy(&reuse_solver->is_R);
5100:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
5101:     reuse_solver->is_R = pcbddc->is_R_local;
5102:   } else {
5103:     PetscFree(idx_R_local);
5104:   }

5106:   /* print some info if requested */
5107:   if (pcbddc->dbg_flag) {
5108:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5109:     PetscViewerFlush(pcbddc->dbg_viewer);
5110:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5111:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5112:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
5113:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5114:     PetscViewerFlush(pcbddc->dbg_viewer);
5115:   }

5117:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5118:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5119:     IS       is_aux1,is_aux2;
5120:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

5122:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5123:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5124:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5125:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5126:     for (i=0; i<n_D; i++) {
5127:       PetscBTSet(bitmask,is_indices[i]);
5128:     }
5129:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5130:     for (i=0, j=0; i<n_R; i++) {
5131:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5132:         aux_array1[j++] = i;
5133:       }
5134:     }
5135:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5136:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5137:     for (i=0, j=0; i<n_B; i++) {
5138:       if (!PetscBTLookup(bitmask,is_indices[i])) {
5139:         aux_array2[j++] = i;
5140:       }
5141:     }
5142:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5143:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5144:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5145:     ISDestroy(&is_aux1);
5146:     ISDestroy(&is_aux2);

5148:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5149:       PetscMalloc1(n_D,&aux_array1);
5150:       for (i=0, j=0; i<n_R; i++) {
5151:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
5152:           aux_array1[j++] = i;
5153:         }
5154:       }
5155:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5156:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5157:       ISDestroy(&is_aux1);
5158:     }
5159:     PetscBTDestroy(&bitmask);
5160:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5161:   } else {
5162:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5163:     IS                 tis;
5164:     PetscInt           schur_size;

5166:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
5167:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5168:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5169:     ISDestroy(&tis);
5170:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5171:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5172:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5173:       ISDestroy(&tis);
5174:     }
5175:   }
5176:   return(0);
5177: }


5180: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5181: {
5182:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
5183:   PC_IS          *pcis = (PC_IS*)pc->data;
5184:   PC             pc_temp;
5185:   Mat            A_RR;
5186:   MatReuse       reuse;
5187:   PetscScalar    m_one = -1.0;
5188:   PetscReal      value;
5189:   PetscInt       n_D,n_R;
5190:   PetscBool      check_corr,issbaij;
5192:   /* prefixes stuff */
5193:   char           dir_prefix[256],neu_prefix[256],str_level[16];
5194:   size_t         len;


5198:   /* compute prefixes */
5199:   PetscStrcpy(dir_prefix,"");
5200:   PetscStrcpy(neu_prefix,"");
5201:   if (!pcbddc->current_level) {
5202:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5203:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5204:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5205:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5206:   } else {
5207:     PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5208:     PetscStrlen(((PetscObject)pc)->prefix,&len);
5209:     len -= 15; /* remove "pc_bddc_coarse_" */
5210:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5211:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5212:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5213:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5214:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5215:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5216:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5217:     PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5218:     PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5219:   }

5221:   /* DIRICHLET PROBLEM */
5222:   if (dirichlet) {
5223:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5224:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5225:       if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n");
5226:       if (pcbddc->dbg_flag) {
5227:         Mat    A_IIn;

5229:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5230:         MatDestroy(&pcis->A_II);
5231:         pcis->A_II = A_IIn;
5232:       }
5233:     }
5234:     if (pcbddc->local_mat->symmetric_set) {
5235:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5236:     }
5237:     /* Matrix for Dirichlet problem is pcis->A_II */
5238:     n_D = pcis->n - pcis->n_B;
5239:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5240:       void (*f)(void) = 0;

5242:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5243:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5244:       /* default */
5245:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5246:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5247:       PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
5248:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5249:       if (issbaij) {
5250:         PCSetType(pc_temp,PCCHOLESKY);
5251:       } else {
5252:         PCSetType(pc_temp,PCLU);
5253:       }
5254:       /* Allow user's customization */
5255:       KSPSetFromOptions(pcbddc->ksp_D);
5256:       PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5257:       if (f && pcbddc->mat_graph->cloc) {
5258:         PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5259:         const PetscInt *idxs;
5260:         PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5262:         ISGetLocalSize(pcis->is_I_local,&nl);
5263:         ISGetIndices(pcis->is_I_local,&idxs);
5264:         PetscMalloc1(nl*cdim,&scoords);
5265:         for (i=0;i<nl;i++) {
5266:           for (d=0;d<cdim;d++) {
5267:             scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5268:           }
5269:         }
5270:         ISRestoreIndices(pcis->is_I_local,&idxs);
5271:         PCSetCoordinates(pc_temp,cdim,nl,scoords);
5272:         PetscFree(scoords);
5273:       }
5274:     }
5275:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
5276:     if (sub_schurs && sub_schurs->reuse_solver) {
5277:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5279:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5280:     }
5281:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5282:     if (!n_D) {
5283:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5284:       PCSetType(pc_temp,PCNONE);
5285:     }
5286:     /* set ksp_D into pcis data */
5287:     KSPDestroy(&pcis->ksp_D);
5288:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
5289:     pcis->ksp_D = pcbddc->ksp_D;
5290:   }

5292:   /* NEUMANN PROBLEM */
5293:   A_RR = 0;
5294:   if (neumann) {
5295:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5296:     PetscInt        ibs,mbs;
5297:     PetscBool       issbaij, reuse_neumann_solver;
5298:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

5300:     reuse_neumann_solver = PETSC_FALSE;
5301:     if (sub_schurs && sub_schurs->reuse_solver) {
5302:       IS iP;

5304:       reuse_neumann_solver = PETSC_TRUE;
5305:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5306:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5307:     }
5308:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5309:     ISGetSize(pcbddc->is_R_local,&n_R);
5310:     if (pcbddc->ksp_R) { /* already created ksp */
5311:       PetscInt nn_R;
5312:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5313:       PetscObjectReference((PetscObject)A_RR);
5314:       MatGetSize(A_RR,&nn_R,NULL);
5315:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5316:         KSPReset(pcbddc->ksp_R);
5317:         MatDestroy(&A_RR);
5318:         reuse = MAT_INITIAL_MATRIX;
5319:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5320:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5321:           MatDestroy(&A_RR);
5322:           reuse = MAT_INITIAL_MATRIX;
5323:         } else { /* safe to reuse the matrix */
5324:           reuse = MAT_REUSE_MATRIX;
5325:         }
5326:       }
5327:       /* last check */
5328:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5329:         MatDestroy(&A_RR);
5330:         reuse = MAT_INITIAL_MATRIX;
5331:       }
5332:     } else { /* first time, so we need to create the matrix */
5333:       reuse = MAT_INITIAL_MATRIX;
5334:     }
5335:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5336:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5337:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5338:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5339:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5340:       if (matis->A == pcbddc->local_mat) {
5341:         MatDestroy(&pcbddc->local_mat);
5342:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5343:       } else {
5344:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5345:       }
5346:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5347:       if (matis->A == pcbddc->local_mat) {
5348:         MatDestroy(&pcbddc->local_mat);
5349:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5350:       } else {
5351:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5352:       }
5353:     }
5354:     /* extract A_RR */
5355:     if (reuse_neumann_solver) {
5356:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5358:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5359:         MatDestroy(&A_RR);
5360:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5361:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5362:         } else {
5363:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5364:         }
5365:       } else {
5366:         MatDestroy(&A_RR);
5367:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5368:         PetscObjectReference((PetscObject)A_RR);
5369:       }
5370:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5371:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5372:     }
5373:     if (pcbddc->local_mat->symmetric_set) {
5374:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5375:     }
5376:     if (!pcbddc->ksp_R) { /* create object if not present */
5377:       void (*f)(void) = 0;

5379:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5380:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5381:       /* default */
5382:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5383:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5384:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5385:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5386:       if (issbaij) {
5387:         PCSetType(pc_temp,PCCHOLESKY);
5388:       } else {
5389:         PCSetType(pc_temp,PCLU);
5390:       }
5391:       /* Allow user's customization */
5392:       KSPSetFromOptions(pcbddc->ksp_R);
5393:       PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5394:       if (f && pcbddc->mat_graph->cloc) {
5395:         PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5396:         const PetscInt *idxs;
5397:         PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5399:         ISGetLocalSize(pcbddc->is_R_local,&nl);
5400:         ISGetIndices(pcbddc->is_R_local,&idxs);
5401:         PetscMalloc1(nl*cdim,&scoords);
5402:         for (i=0;i<nl;i++) {
5403:           for (d=0;d<cdim;d++) {
5404:             scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5405:           }
5406:         }
5407:         ISRestoreIndices(pcbddc->is_R_local,&idxs);
5408:         PCSetCoordinates(pc_temp,cdim,nl,scoords);
5409:         PetscFree(scoords);
5410:       }
5411:     }
5412:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5413:     if (!n_R) {
5414:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5415:       PCSetType(pc_temp,PCNONE);
5416:     }
5417:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5418:     /* Reuse solver if it is present */
5419:     if (reuse_neumann_solver) {
5420:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5422:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5423:     }
5424:   }

5426:   if (pcbddc->dbg_flag) {
5427:     PetscViewerFlush(pcbddc->dbg_viewer);
5428:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5429:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5430:   }

5432:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5433:   check_corr = PETSC_FALSE;
5434:   if (pcbddc->NullSpace_corr[0]) {
5435:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5436:   }
5437:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5438:     check_corr = PETSC_TRUE;
5439:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5440:   }
5441:   if (neumann && pcbddc->NullSpace_corr[2]) {
5442:     check_corr = PETSC_TRUE;
5443:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5444:   }
5445:   /* check Dirichlet and Neumann solvers */
5446:   if (pcbddc->dbg_flag) {
5447:     if (dirichlet) { /* Dirichlet */
5448:       VecSetRandom(pcis->vec1_D,NULL);
5449:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5450:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5451:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5452:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5453:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5454:       if (check_corr) {
5455:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);
5456:       }
5457:       PetscViewerFlush(pcbddc->dbg_viewer);
5458:     }
5459:     if (neumann) { /* Neumann */
5460:       VecSetRandom(pcbddc->vec1_R,NULL);
5461:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5462:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5463:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5464:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5465:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5466:       if (check_corr) {
5467:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);
5468:       }
5469:       PetscViewerFlush(pcbddc->dbg_viewer);
5470:     }
5471:   }
5472:   /* free Neumann problem's matrix */
5473:   MatDestroy(&A_RR);
5474:   return(0);
5475: }

5477: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5478: {
5479:   PetscErrorCode  ierr;
5480:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5481:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5482:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;

5485:   if (!reuse_solver) {
5486:     VecSet(pcbddc->vec1_R,0.);
5487:   }
5488:   if (!pcbddc->switch_static) {
5489:     if (applytranspose && pcbddc->local_auxmat1) {
5490:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5491:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5492:     }
5493:     if (!reuse_solver) {
5494:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5495:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5496:     } else {
5497:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5499:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5500:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5501:     }
5502:   } else {
5503:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5504:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5505:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5506:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5507:     if (applytranspose && pcbddc->local_auxmat1) {
5508:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5509:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5510:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5511:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5512:     }
5513:   }
5514:   if (!reuse_solver || pcbddc->switch_static) {
5515:     if (applytranspose) {
5516:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5517:     } else {
5518:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5519:     }
5520:   } else {
5521:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5523:     if (applytranspose) {
5524:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5525:     } else {
5526:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5527:     }
5528:   }
5529:   VecSet(inout_B,0.);
5530:   if (!pcbddc->switch_static) {
5531:     if (!reuse_solver) {
5532:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5533:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5534:     } else {
5535:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5537:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5538:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5539:     }
5540:     if (!applytranspose && pcbddc->local_auxmat1) {
5541:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5542:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5543:     }
5544:   } else {
5545:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5546:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5547:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5548:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5549:     if (!applytranspose && pcbddc->local_auxmat1) {
5550:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5551:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5552:     }
5553:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5554:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5555:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5556:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5557:   }
5558:   return(0);
5559: }

5561: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5562: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5563: {
5565:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5566:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5567:   const PetscScalar zero = 0.0;

5570:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5571:   if (!pcbddc->benign_apply_coarse_only) {
5572:     if (applytranspose) {
5573:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5574:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5575:     } else {
5576:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5577:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5578:     }
5579:   } else {
5580:     VecSet(pcbddc->vec1_P,zero);
5581:   }

5583:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5584:   if (pcbddc->benign_n) {
5585:     PetscScalar *array;
5586:     PetscInt    j;

5588:     VecGetArray(pcbddc->vec1_P,&array);
5589:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5590:     VecRestoreArray(pcbddc->vec1_P,&array);
5591:   }

5593:   /* start communications from local primal nodes to rhs of coarse solver */
5594:   VecSet(pcbddc->coarse_vec,zero);
5595:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5596:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5598:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5599:   if (pcbddc->coarse_ksp) {
5600:     Mat          coarse_mat;
5601:     Vec          rhs,sol;
5602:     MatNullSpace nullsp;
5603:     PetscBool    isbddc = PETSC_FALSE;

5605:     if (pcbddc->benign_have_null) {
5606:       PC        coarse_pc;

5608:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5609:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5610:       /* we need to propagate to coarser levels the need for a possible benign correction */
5611:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5612:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5613:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5614:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5615:       }
5616:     }
5617:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5618:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5619:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5620:     MatGetNullSpace(coarse_mat,&nullsp);
5621:     if (nullsp) {
5622:       MatNullSpaceRemove(nullsp,rhs);
5623:     }
5624:     if (applytranspose) {
5625:       if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5626:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5627:     } else {
5628:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5629:         PC        coarse_pc;

5631:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5632:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5633:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5634:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5635:       } else {
5636:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5637:       }
5638:     }
5639:     /* we don't need the benign correction at coarser levels anymore */
5640:     if (pcbddc->benign_have_null && isbddc) {
5641:       PC        coarse_pc;
5642:       PC_BDDC*  coarsepcbddc;

5644:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5645:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5646:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5647:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5648:     }
5649:     if (nullsp) {
5650:       MatNullSpaceRemove(nullsp,sol);
5651:     }
5652:   }

5654:   /* Local solution on R nodes */
5655:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5656:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5657:   }
5658:   /* communications from coarse sol to local primal nodes */
5659:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5660:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5662:   /* Sum contributions from the two levels */
5663:   if (!pcbddc->benign_apply_coarse_only) {
5664:     if (applytranspose) {
5665:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5666:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5667:     } else {
5668:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5669:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5670:     }
5671:     /* store p0 */
5672:     if (pcbddc->benign_n) {
5673:       PetscScalar *array;
5674:       PetscInt    j;

5676:       VecGetArray(pcbddc->vec1_P,&array);
5677:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5678:       VecRestoreArray(pcbddc->vec1_P,&array);
5679:     }
5680:   } else { /* expand the coarse solution */
5681:     if (applytranspose) {
5682:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5683:     } else {
5684:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5685:     }
5686:   }
5687:   return(0);
5688: }

5690: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5691: {
5693:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5694:   PetscScalar    *array;
5695:   Vec            from,to;

5698:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5699:     from = pcbddc->coarse_vec;
5700:     to = pcbddc->vec1_P;
5701:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5702:       Vec tvec;

5704:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5705:       VecResetArray(tvec);
5706:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5707:       VecGetArray(tvec,&array);
5708:       VecPlaceArray(from,array);
5709:       VecRestoreArray(tvec,&array);
5710:     }
5711:   } else { /* from local to global -> put data in coarse right hand side */
5712:     from = pcbddc->vec1_P;
5713:     to = pcbddc->coarse_vec;
5714:   }
5715:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5716:   return(0);
5717: }

5719: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5720: {
5722:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5723:   PetscScalar    *array;
5724:   Vec            from,to;

5727:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5728:     from = pcbddc->coarse_vec;
5729:     to = pcbddc->vec1_P;
5730:   } else { /* from local to global -> put data in coarse right hand side */
5731:     from = pcbddc->vec1_P;
5732:     to = pcbddc->coarse_vec;
5733:   }
5734:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5735:   if (smode == SCATTER_FORWARD) {
5736:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5737:       Vec tvec;

5739:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5740:       VecGetArray(to,&array);
5741:       VecPlaceArray(tvec,array);
5742:       VecRestoreArray(to,&array);
5743:     }
5744:   } else {
5745:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5746:      VecResetArray(from);
5747:     }
5748:   }
5749:   return(0);
5750: }

5752: /* uncomment for testing purposes */
5753: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5754: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5755: {
5756:   PetscErrorCode    ierr;
5757:   PC_IS*            pcis = (PC_IS*)(pc->data);
5758:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
5759:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
5760:   /* one and zero */
5761:   PetscScalar       one=1.0,zero=0.0;
5762:   /* space to store constraints and their local indices */
5763:   PetscScalar       *constraints_data;
5764:   PetscInt          *constraints_idxs,*constraints_idxs_B;
5765:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
5766:   PetscInt          *constraints_n;
5767:   /* iterators */
5768:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
5769:   /* BLAS integers */
5770:   PetscBLASInt      lwork,lierr;
5771:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
5772:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
5773:   /* reuse */
5774:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
5775:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
5776:   /* change of basis */
5777:   PetscBool         qr_needed;
5778:   PetscBT           change_basis,qr_needed_idx;
5779:   /* auxiliary stuff */
5780:   PetscInt          *nnz,*is_indices;
5781:   PetscInt          ncc;
5782:   /* some quantities */
5783:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
5784:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5785:   PetscReal         tol; /* tolerance for retaining eigenmodes */

5788:   tol  = PetscSqrtReal(PETSC_SMALL);
5789:   /* Destroy Mat objects computed previously */
5790:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5791:   MatDestroy(&pcbddc->ConstraintMatrix);
5792:   MatDestroy(&pcbddc->switch_static_change);
5793:   /* save info on constraints from previous setup (if any) */
5794:   olocal_primal_size = pcbddc->local_primal_size;
5795:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5796:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5797:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5798:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5799:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5800:   PetscFree(pcbddc->primal_indices_local_idxs);

5802:   if (!pcbddc->adaptive_selection) {
5803:     IS           ISForVertices,*ISForFaces,*ISForEdges;
5804:     MatNullSpace nearnullsp;
5805:     const Vec    *nearnullvecs;
5806:     Vec          *localnearnullsp;
5807:     PetscScalar  *array;
5808:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
5809:     PetscBool    nnsp_has_cnst;
5810:     /* LAPACK working arrays for SVD or POD */
5811:     PetscBool    skip_lapack,boolforchange;
5812:     PetscScalar  *work;
5813:     PetscReal    *singular_vals;
5814: #if defined(PETSC_USE_COMPLEX)
5815:     PetscReal    *rwork;
5816: #endif
5817: #if defined(PETSC_MISSING_LAPACK_GESVD)
5818:     PetscScalar  *temp_basis,*correlation_mat;
5819: #else
5820:     PetscBLASInt dummy_int=1;
5821:     PetscScalar  dummy_scalar=1.;
5822: #endif

5824:     /* Get index sets for faces, edges and vertices from graph */
5825:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
5826:     /* print some info */
5827:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5828:       PetscInt nv;

5830:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
5831:       ISGetSize(ISForVertices,&nv);
5832:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5833:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
5834:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
5835:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
5836:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
5837:       PetscViewerFlush(pcbddc->dbg_viewer);
5838:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5839:     }

5841:     /* free unneeded index sets */
5842:     if (!pcbddc->use_vertices) {
5843:       ISDestroy(&ISForVertices);
5844:     }
5845:     if (!pcbddc->use_edges) {
5846:       for (i=0;i<n_ISForEdges;i++) {
5847:         ISDestroy(&ISForEdges[i]);
5848:       }
5849:       PetscFree(ISForEdges);
5850:       n_ISForEdges = 0;
5851:     }
5852:     if (!pcbddc->use_faces) {
5853:       for (i=0;i<n_ISForFaces;i++) {
5854:         ISDestroy(&ISForFaces[i]);
5855:       }
5856:       PetscFree(ISForFaces);
5857:       n_ISForFaces = 0;
5858:     }

5860:     /* check if near null space is attached to global mat */
5861:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
5862:     if (nearnullsp) {
5863:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
5864:       /* remove any stored info */
5865:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
5866:       PetscFree(pcbddc->onearnullvecs_state);
5867:       /* store information for BDDC solver reuse */
5868:       PetscObjectReference((PetscObject)nearnullsp);
5869:       pcbddc->onearnullspace = nearnullsp;
5870:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
5871:       for (i=0;i<nnsp_size;i++) {
5872:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
5873:       }
5874:     } else { /* if near null space is not provided BDDC uses constants by default */
5875:       nnsp_size = 0;
5876:       nnsp_has_cnst = PETSC_TRUE;
5877:     }
5878:     /* get max number of constraints on a single cc */
5879:     max_constraints = nnsp_size;
5880:     if (nnsp_has_cnst) max_constraints++;

5882:     /*
5883:          Evaluate maximum storage size needed by the procedure
5884:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5885:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5886:          There can be multiple constraints per connected component
5887:                                                                                                                                                            */
5888:     n_vertices = 0;
5889:     if (ISForVertices) {
5890:       ISGetSize(ISForVertices,&n_vertices);
5891:     }
5892:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
5893:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

5895:     total_counts = n_ISForFaces+n_ISForEdges;
5896:     total_counts *= max_constraints;
5897:     total_counts += n_vertices;
5898:     PetscBTCreate(total_counts,&change_basis);

5900:     total_counts = 0;
5901:     max_size_of_constraint = 0;
5902:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
5903:       IS used_is;
5904:       if (i<n_ISForEdges) {
5905:         used_is = ISForEdges[i];
5906:       } else {
5907:         used_is = ISForFaces[i-n_ISForEdges];
5908:       }
5909:       ISGetSize(used_is,&j);
5910:       total_counts += j;
5911:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
5912:     }
5913:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

5915:     /* get local part of global near null space vectors */
5916:     PetscMalloc1(nnsp_size,&localnearnullsp);
5917:     for (k=0;k<nnsp_size;k++) {
5918:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
5919:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5920:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5921:     }

5923:     /* whether or not to skip lapack calls */
5924:     skip_lapack = PETSC_TRUE;
5925:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

5927:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5928:     if (!skip_lapack) {
5929:       PetscScalar temp_work;

5931: #if defined(PETSC_MISSING_LAPACK_GESVD)
5932:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5933:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
5934:       PetscMalloc1(max_constraints,&singular_vals);
5935:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
5936: #if defined(PETSC_USE_COMPLEX)
5937:       PetscMalloc1(3*max_constraints,&rwork);
5938: #endif
5939:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5940:       PetscBLASIntCast(max_constraints,&Blas_N);
5941:       PetscBLASIntCast(max_constraints,&Blas_LDA);
5942:       lwork = -1;
5943:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5944: #if !defined(PETSC_USE_COMPLEX)
5945:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
5946: #else
5947:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
5948: #endif
5949:       PetscFPTrapPop();
5950:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
5951: #else /* on missing GESVD */
5952:       /* SVD */
5953:       PetscInt max_n,min_n;
5954:       max_n = max_size_of_constraint;
5955:       min_n = max_constraints;
5956:       if (max_size_of_constraint < max_constraints) {
5957:         min_n = max_size_of_constraint;
5958:         max_n = max_constraints;
5959:       }
5960:       PetscMalloc1(min_n,&singular_vals);
5961: #if defined(PETSC_USE_COMPLEX)
5962:       PetscMalloc1(5*min_n,&rwork);
5963: #endif
5964:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5965:       lwork = -1;
5966:       PetscBLASIntCast(max_n,&Blas_M);
5967:       PetscBLASIntCast(min_n,&Blas_N);
5968:       PetscBLASIntCast(max_n,&Blas_LDA);
5969:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5970: #if !defined(PETSC_USE_COMPLEX)
5971:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
5972: #else
5973:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
5974: #endif
5975:       PetscFPTrapPop();
5976:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
5977: #endif /* on missing GESVD */
5978:       /* Allocate optimal workspace */
5979:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
5980:       PetscMalloc1(lwork,&work);
5981:     }
5982:     /* Now we can loop on constraining sets */
5983:     total_counts = 0;
5984:     constraints_idxs_ptr[0] = 0;
5985:     constraints_data_ptr[0] = 0;
5986:     /* vertices */
5987:     if (n_vertices) {
5988:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
5989:       PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
5990:       for (i=0;i<n_vertices;i++) {
5991:         constraints_n[total_counts] = 1;
5992:         constraints_data[total_counts] = 1.0;
5993:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
5994:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
5995:         total_counts++;
5996:       }
5997:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
5998:       n_vertices = total_counts;
5999:     }

6001:     /* edges and faces */
6002:     total_counts_cc = total_counts;
6003:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6004:       IS        used_is;
6005:       PetscBool idxs_copied = PETSC_FALSE;

6007:       if (ncc<n_ISForEdges) {
6008:         used_is = ISForEdges[ncc];
6009:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6010:       } else {
6011:         used_is = ISForFaces[ncc-n_ISForEdges];
6012:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6013:       }
6014:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

6016:       ISGetSize(used_is,&size_of_constraint);
6017:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
6018:       /* change of basis should not be performed on local periodic nodes */
6019:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6020:       if (nnsp_has_cnst) {
6021:         PetscScalar quad_value;

6023:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6024:         idxs_copied = PETSC_TRUE;

6026:         if (!pcbddc->use_nnsp_true) {
6027:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6028:         } else {
6029:           quad_value = 1.0;
6030:         }
6031:         for (j=0;j<size_of_constraint;j++) {
6032:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6033:         }
6034:         temp_constraints++;
6035:         total_counts++;
6036:       }
6037:       for (k=0;k<nnsp_size;k++) {
6038:         PetscReal real_value;
6039:         PetscScalar *ptr_to_data;

6041:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6042:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6043:         for (j=0;j<size_of_constraint;j++) {
6044:           ptr_to_data[j] = array[is_indices[j]];
6045:         }
6046:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6047:         /* check if array is null on the connected component */
6048:         PetscBLASIntCast(size_of_constraint,&Blas_N);
6049:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6050:         if (real_value > tol*size_of_constraint) { /* keep indices and values */
6051:           temp_constraints++;
6052:           total_counts++;
6053:           if (!idxs_copied) {
6054:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6055:             idxs_copied = PETSC_TRUE;
6056:           }
6057:         }
6058:       }
6059:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6060:       valid_constraints = temp_constraints;
6061:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6062:         if (temp_constraints == 1) { /* just normalize the constraint */
6063:           PetscScalar norm,*ptr_to_data;

6065:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6066:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6067:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6068:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6069:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6070:         } else { /* perform SVD */
6071:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6073: #if defined(PETSC_MISSING_LAPACK_GESVD)
6074:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6075:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6076:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6077:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
6078:                 from that computed using LAPACKgesvd
6079:              -> This is due to a different computation of eigenvectors in LAPACKheev
6080:              -> The quality of the POD-computed basis will be the same */
6081:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
6082:           /* Store upper triangular part of correlation matrix */
6083:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6084:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6085:           for (j=0;j<temp_constraints;j++) {
6086:             for (k=0;k<j+1;k++) {
6087:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6088:             }
6089:           }
6090:           /* compute eigenvalues and eigenvectors of correlation matrix */
6091:           PetscBLASIntCast(temp_constraints,&Blas_N);
6092:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
6093: #if !defined(PETSC_USE_COMPLEX)
6094:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6095: #else
6096:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6097: #endif
6098:           PetscFPTrapPop();
6099:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6100:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6101:           j = 0;
6102:           while (j < temp_constraints && singular_vals[j] < tol) j++;
6103:           total_counts = total_counts-j;
6104:           valid_constraints = temp_constraints-j;
6105:           /* scale and copy POD basis into used quadrature memory */
6106:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6107:           PetscBLASIntCast(temp_constraints,&Blas_N);
6108:           PetscBLASIntCast(temp_constraints,&Blas_K);
6109:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6110:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
6111:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6112:           if (j<temp_constraints) {
6113:             PetscInt ii;
6114:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6115:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6116:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6117:             PetscFPTrapPop();
6118:             for (k=0;k<temp_constraints-j;k++) {
6119:               for (ii=0;ii<size_of_constraint;ii++) {
6120:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6121:               }
6122:             }
6123:           }
6124: #else  /* on missing GESVD */
6125:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6126:           PetscBLASIntCast(temp_constraints,&Blas_N);
6127:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6128:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6129: #if !defined(PETSC_USE_COMPLEX)
6130:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6131: #else
6132:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6133: #endif
6134:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6135:           PetscFPTrapPop();
6136:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6137:           k = temp_constraints;
6138:           if (k > size_of_constraint) k = size_of_constraint;
6139:           j = 0;
6140:           while (j < k && singular_vals[k-j-1] < tol) j++;
6141:           valid_constraints = k-j;
6142:           total_counts = total_counts-temp_constraints+valid_constraints;
6143: #endif /* on missing GESVD */
6144:         }
6145:       }
6146:       /* update pointers information */
6147:       if (valid_constraints) {
6148:         constraints_n[total_counts_cc] = valid_constraints;
6149:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6150:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6151:         /* set change_of_basis flag */
6152:         if (boolforchange) {
6153:           PetscBTSet(change_basis,total_counts_cc);
6154:         }
6155:         total_counts_cc++;
6156:       }
6157:     }
6158:     /* free workspace */
6159:     if (!skip_lapack) {
6160:       PetscFree(work);
6161: #if defined(PETSC_USE_COMPLEX)
6162:       PetscFree(rwork);
6163: #endif
6164:       PetscFree(singular_vals);
6165: #if defined(PETSC_MISSING_LAPACK_GESVD)
6166:       PetscFree(correlation_mat);
6167:       PetscFree(temp_basis);
6168: #endif
6169:     }
6170:     for (k=0;k<nnsp_size;k++) {
6171:       VecDestroy(&localnearnullsp[k]);
6172:     }
6173:     PetscFree(localnearnullsp);
6174:     /* free index sets of faces, edges and vertices */
6175:     for (i=0;i<n_ISForFaces;i++) {
6176:       ISDestroy(&ISForFaces[i]);
6177:     }
6178:     if (n_ISForFaces) {
6179:       PetscFree(ISForFaces);
6180:     }
6181:     for (i=0;i<n_ISForEdges;i++) {
6182:       ISDestroy(&ISForEdges[i]);
6183:     }
6184:     if (n_ISForEdges) {
6185:       PetscFree(ISForEdges);
6186:     }
6187:     ISDestroy(&ISForVertices);
6188:   } else {
6189:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6191:     total_counts = 0;
6192:     n_vertices = 0;
6193:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6194:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6195:     }
6196:     max_constraints = 0;
6197:     total_counts_cc = 0;
6198:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6199:       total_counts += pcbddc->adaptive_constraints_n[i];
6200:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6201:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6202:     }
6203:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6204:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6205:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
6206:     constraints_data = pcbddc->adaptive_constraints_data;
6207:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6208:     PetscMalloc1(total_counts_cc,&constraints_n);
6209:     total_counts_cc = 0;
6210:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6211:       if (pcbddc->adaptive_constraints_n[i]) {
6212:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6213:       }
6214:     }
6215: #if 0
6216:     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
6217:     for (i=0;i<total_counts_cc;i++) {
6218:       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
6219:       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
6220:       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
6221:         printf(" %d",constraints_idxs[j]);
6222:       }
6223:       printf("\n");
6224:       printf("number of cc: %d\n",constraints_n[i]);
6225:     }
6226:     for (i=0;i<n_vertices;i++) {
6227:       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
6228:     }
6229:     for (i=0;i<sub_schurs->n_subs;i++) {
6230:       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
6231:     }
6232: #endif

6234:     max_size_of_constraint = 0;
6235:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6236:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6237:     /* Change of basis */
6238:     PetscBTCreate(total_counts_cc,&change_basis);
6239:     if (pcbddc->use_change_of_basis) {
6240:       for (i=0;i<sub_schurs->n_subs;i++) {
6241:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6242:           PetscBTSet(change_basis,i+n_vertices);
6243:         }
6244:       }
6245:     }
6246:   }
6247:   pcbddc->local_primal_size = total_counts;
6248:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

6250:   /* map constraints_idxs in boundary numbering */
6251:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6252:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);

6254:   /* Create constraint matrix */
6255:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6256:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6257:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

6259:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6260:   /* determine if a QR strategy is needed for change of basis */
6261:   qr_needed = PETSC_FALSE;
6262:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
6263:   total_primal_vertices=0;
6264:   pcbddc->local_primal_size_cc = 0;
6265:   for (i=0;i<total_counts_cc;i++) {
6266:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6267:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6268:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6269:       pcbddc->local_primal_size_cc += 1;
6270:     } else if (PetscBTLookup(change_basis,i)) {
6271:       for (k=0;k<constraints_n[i];k++) {
6272:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6273:       }
6274:       pcbddc->local_primal_size_cc += constraints_n[i];
6275:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6276:         PetscBTSet(qr_needed_idx,i);
6277:         qr_needed = PETSC_TRUE;
6278:       }
6279:     } else {
6280:       pcbddc->local_primal_size_cc += 1;
6281:     }
6282:   }
6283:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6284:   pcbddc->n_vertices = total_primal_vertices;
6285:   /* permute indices in order to have a sorted set of vertices */
6286:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6287:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6288:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
6289:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

6291:   /* nonzero structure of constraint matrix */
6292:   /* and get reference dof for local constraints */
6293:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
6294:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

6296:   j = total_primal_vertices;
6297:   total_counts = total_primal_vertices;
6298:   cum = total_primal_vertices;
6299:   for (i=n_vertices;i<total_counts_cc;i++) {
6300:     if (!PetscBTLookup(change_basis,i)) {
6301:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6302:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6303:       cum++;
6304:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6305:       for (k=0;k<constraints_n[i];k++) {
6306:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6307:         nnz[j+k] = size_of_constraint;
6308:       }
6309:       j += constraints_n[i];
6310:     }
6311:   }
6312:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6313:   PetscFree(nnz);

6315:   /* set values in constraint matrix */
6316:   for (i=0;i<total_primal_vertices;i++) {
6317:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6318:   }
6319:   total_counts = total_primal_vertices;
6320:   for (i=n_vertices;i<total_counts_cc;i++) {
6321:     if (!PetscBTLookup(change_basis,i)) {
6322:       PetscInt *cols;

6324:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6325:       cols = constraints_idxs+constraints_idxs_ptr[i];
6326:       for (k=0;k<constraints_n[i];k++) {
6327:         PetscInt    row = total_counts+k;
6328:         PetscScalar *vals;

6330:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6331:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6332:       }
6333:       total_counts += constraints_n[i];
6334:     }
6335:   }
6336:   /* assembling */
6337:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6338:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6339:   MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);
6340:   MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);
6341:   MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");

6343:   /*
6344:   PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
6345:   MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
6346:   PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
6347:   */
6348:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6349:   if (pcbddc->use_change_of_basis) {
6350:     /* dual and primal dofs on a single cc */
6351:     PetscInt     dual_dofs,primal_dofs;
6352:     /* working stuff for GEQRF */
6353:     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
6354:     PetscBLASInt lqr_work;
6355:     /* working stuff for UNGQR */
6356:     PetscScalar  *gqr_work,lgqr_work_t;
6357:     PetscBLASInt lgqr_work;
6358:     /* working stuff for TRTRS */
6359:     PetscScalar  *trs_rhs;
6360:     PetscBLASInt Blas_NRHS;
6361:     /* pointers for values insertion into change of basis matrix */
6362:     PetscInt     *start_rows,*start_cols;
6363:     PetscScalar  *start_vals;
6364:     /* working stuff for values insertion */
6365:     PetscBT      is_primal;
6366:     PetscInt     *aux_primal_numbering_B;
6367:     /* matrix sizes */
6368:     PetscInt     global_size,local_size;
6369:     /* temporary change of basis */
6370:     Mat          localChangeOfBasisMatrix;
6371:     /* extra space for debugging */
6372:     PetscScalar  *dbg_work;

6374:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6375:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6376:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6377:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6378:     /* nonzeros for local mat */
6379:     PetscMalloc1(pcis->n,&nnz);
6380:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6381:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6382:     } else {
6383:       const PetscInt *ii;
6384:       PetscInt       n;
6385:       PetscBool      flg_row;
6386:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6387:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6388:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6389:     }
6390:     for (i=n_vertices;i<total_counts_cc;i++) {
6391:       if (PetscBTLookup(change_basis,i)) {
6392:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6393:         if (PetscBTLookup(qr_needed_idx,i)) {
6394:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6395:         } else {
6396:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6397:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6398:         }
6399:       }
6400:     }
6401:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6402:     PetscFree(nnz);
6403:     /* Set interior change in the matrix */
6404:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6405:       for (i=0;i<pcis->n;i++) {
6406:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6407:       }
6408:     } else {
6409:       const PetscInt *ii,*jj;
6410:       PetscScalar    *aa;
6411:       PetscInt       n;
6412:       PetscBool      flg_row;
6413:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6414:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6415:       for (i=0;i<n;i++) {
6416:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6417:       }
6418:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6419:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6420:     }

6422:     if (pcbddc->dbg_flag) {
6423:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6424:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6425:     }


6428:     /* Now we loop on the constraints which need a change of basis */
6429:     /*
6430:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6431:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6433:        Basic blocks of change of basis matrix T computed by

6435:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6437:             | 1        0   ...        0         s_1/S |
6438:             | 0        1   ...        0         s_2/S |
6439:             |              ...                        |
6440:             | 0        ...            1     s_{n-1}/S |
6441:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6443:             with S = \sum_{i=1}^n s_i^2
6444:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6445:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6447:           - QR decomposition of constraints otherwise
6448:     */
6449:     if (qr_needed) {
6450:       /* space to store Q */
6451:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6452:       /* array to store scaling factors for reflectors */
6453:       PetscMalloc1(max_constraints,&qr_tau);
6454:       /* first we issue queries for optimal work */
6455:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6456:       PetscBLASIntCast(max_constraints,&Blas_N);
6457:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6458:       lqr_work = -1;
6459:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6460:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6461:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6462:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6463:       lgqr_work = -1;
6464:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6465:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6466:       PetscBLASIntCast(max_constraints,&Blas_K);
6467:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6468:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6469:       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6470:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6471:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6472:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6473:       /* array to store rhs and solution of triangular solver */
6474:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6475:       /* allocating workspace for check */
6476:       if (pcbddc->dbg_flag) {
6477:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6478:       }
6479:     }
6480:     /* array to store whether a node is primal or not */
6481:     PetscBTCreate(pcis->n_B,&is_primal);
6482:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6483:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6484:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
6485:     for (i=0;i<total_primal_vertices;i++) {
6486:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6487:     }
6488:     PetscFree(aux_primal_numbering_B);

6490:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6491:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6492:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6493:       if (PetscBTLookup(change_basis,total_counts)) {
6494:         /* get constraint info */
6495:         primal_dofs = constraints_n[total_counts];
6496:         dual_dofs = size_of_constraint-primal_dofs;

6498:         if (pcbddc->dbg_flag) {
6499:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
6500:         }

6502:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6504:           /* copy quadrature constraints for change of basis check */
6505:           if (pcbddc->dbg_flag) {
6506:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6507:           }
6508:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6509:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6511:           /* compute QR decomposition of constraints */
6512:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6513:           PetscBLASIntCast(primal_dofs,&Blas_N);
6514:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6515:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6516:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6517:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6518:           PetscFPTrapPop();

6520:           /* explictly compute R^-T */
6521:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6522:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6523:           PetscBLASIntCast(primal_dofs,&Blas_N);
6524:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6525:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6526:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6527:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6528:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6529:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6530:           PetscFPTrapPop();

6532:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6533:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6534:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6535:           PetscBLASIntCast(primal_dofs,&Blas_K);
6536:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6537:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6538:           PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6539:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6540:           PetscFPTrapPop();

6542:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6543:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6544:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6545:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6546:           PetscBLASIntCast(primal_dofs,&Blas_N);
6547:           PetscBLASIntCast(primal_dofs,&Blas_K);
6548:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6549:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6550:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6551:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6552:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6553:           PetscFPTrapPop();
6554:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6556:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6557:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6558:           /* insert cols for primal dofs */
6559:           for (j=0;j<primal_dofs;j++) {
6560:             start_vals = &qr_basis[j*size_of_constraint];
6561:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6562:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6563:           }
6564:           /* insert cols for dual dofs */
6565:           for (j=0,k=0;j<dual_dofs;k++) {
6566:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6567:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6568:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6569:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6570:               j++;
6571:             }
6572:           }

6574:           /* check change of basis */
6575:           if (pcbddc->dbg_flag) {
6576:             PetscInt   ii,jj;
6577:             PetscBool valid_qr=PETSC_TRUE;
6578:             PetscBLASIntCast(primal_dofs,&Blas_M);
6579:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6580:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6581:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6582:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6583:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6584:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6585:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6586:             PetscFPTrapPop();
6587:             for (jj=0;jj<size_of_constraint;jj++) {
6588:               for (ii=0;ii<primal_dofs;ii++) {
6589:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6590:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6591:               }
6592:             }
6593:             if (!valid_qr) {
6594:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6595:               for (jj=0;jj<size_of_constraint;jj++) {
6596:                 for (ii=0;ii<primal_dofs;ii++) {
6597:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6598:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6599:                   }
6600:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6601:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6602:                   }
6603:                 }
6604:               }
6605:             } else {
6606:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6607:             }
6608:           }
6609:         } else { /* simple transformation block */
6610:           PetscInt    row,col;
6611:           PetscScalar val,norm;

6613:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6614:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6615:           for (j=0;j<size_of_constraint;j++) {
6616:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6617:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6618:             if (!PetscBTLookup(is_primal,row_B)) {
6619:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6620:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6621:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6622:             } else {
6623:               for (k=0;k<size_of_constraint;k++) {
6624:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6625:                 if (row != col) {
6626:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6627:                 } else {
6628:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6629:                 }
6630:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6631:               }
6632:             }
6633:           }
6634:           if (pcbddc->dbg_flag) {
6635:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6636:           }
6637:         }
6638:       } else {
6639:         if (pcbddc->dbg_flag) {
6640:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
6641:         }
6642:       }
6643:     }

6645:     /* free workspace */
6646:     if (qr_needed) {
6647:       if (pcbddc->dbg_flag) {
6648:         PetscFree(dbg_work);
6649:       }
6650:       PetscFree(trs_rhs);
6651:       PetscFree(qr_tau);
6652:       PetscFree(qr_work);
6653:       PetscFree(gqr_work);
6654:       PetscFree(qr_basis);
6655:     }
6656:     PetscBTDestroy(&is_primal);
6657:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6658:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6660:     /* assembling of global change of variable */
6661:     if (!pcbddc->fake_change) {
6662:       Mat      tmat;
6663:       PetscInt bs;

6665:       VecGetSize(pcis->vec1_global,&global_size);
6666:       VecGetLocalSize(pcis->vec1_global,&local_size);
6667:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6668:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6669:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6670:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6671:       MatGetBlockSize(pc->pmat,&bs);
6672:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6673:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6674:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6675:       MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6676:       MatDestroy(&tmat);
6677:       VecSet(pcis->vec1_global,0.0);
6678:       VecSet(pcis->vec1_N,1.0);
6679:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6680:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6681:       VecReciprocal(pcis->vec1_global);
6682:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6684:       /* check */
6685:       if (pcbddc->dbg_flag) {
6686:         PetscReal error;
6687:         Vec       x,x_change;

6689:         VecDuplicate(pcis->vec1_global,&x);
6690:         VecDuplicate(pcis->vec1_global,&x_change);
6691:         VecSetRandom(x,NULL);
6692:         VecCopy(x,pcis->vec1_global);
6693:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6694:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6695:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6696:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6697:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6698:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6699:         VecAXPY(x,-1.0,x_change);
6700:         VecNorm(x,NORM_INFINITY,&error);
6701:         if (error > PETSC_SMALL) {
6702:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
6703:         }
6704:         VecDestroy(&x);
6705:         VecDestroy(&x_change);
6706:       }
6707:       /* adapt sub_schurs computed (if any) */
6708:       if (pcbddc->use_deluxe_scaling) {
6709:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6711:         if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6712:         if (sub_schurs && sub_schurs->S_Ej_all) {
6713:           Mat                    S_new,tmat;
6714:           IS                     is_all_N,is_V_Sall = NULL;

6716:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6717:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6718:           if (pcbddc->deluxe_zerorows) {
6719:             ISLocalToGlobalMapping NtoSall;
6720:             IS                     is_V;
6721:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6722:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6723:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6724:             ISLocalToGlobalMappingDestroy(&NtoSall);
6725:             ISDestroy(&is_V);
6726:           }
6727:           ISDestroy(&is_all_N);
6728:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6729:           MatDestroy(&sub_schurs->S_Ej_all);
6730:           PetscObjectReference((PetscObject)S_new);
6731:           if (pcbddc->deluxe_zerorows) {
6732:             const PetscScalar *array;
6733:             const PetscInt    *idxs_V,*idxs_all;
6734:             PetscInt          i,n_V;

6736:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6737:             ISGetLocalSize(is_V_Sall,&n_V);
6738:             ISGetIndices(is_V_Sall,&idxs_V);
6739:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6740:             VecGetArrayRead(pcis->D,&array);
6741:             for (i=0;i<n_V;i++) {
6742:               PetscScalar val;
6743:               PetscInt    idx;

6745:               idx = idxs_V[i];
6746:               val = array[idxs_all[idxs_V[i]]];
6747:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6748:             }
6749:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6750:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6751:             VecRestoreArrayRead(pcis->D,&array);
6752:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6753:             ISRestoreIndices(is_V_Sall,&idxs_V);
6754:           }
6755:           sub_schurs->S_Ej_all = S_new;
6756:           MatDestroy(&S_new);
6757:           if (sub_schurs->sum_S_Ej_all) {
6758:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6759:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6760:             PetscObjectReference((PetscObject)S_new);
6761:             if (pcbddc->deluxe_zerorows) {
6762:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6763:             }
6764:             sub_schurs->sum_S_Ej_all = S_new;
6765:             MatDestroy(&S_new);
6766:           }
6767:           ISDestroy(&is_V_Sall);
6768:           MatDestroy(&tmat);
6769:         }
6770:         /* destroy any change of basis context in sub_schurs */
6771:         if (sub_schurs && sub_schurs->change) {
6772:           PetscInt i;

6774:           for (i=0;i<sub_schurs->n_subs;i++) {
6775:             KSPDestroy(&sub_schurs->change[i]);
6776:           }
6777:           PetscFree(sub_schurs->change);
6778:         }
6779:       }
6780:       if (pcbddc->switch_static) { /* need to save the local change */
6781:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6782:       } else {
6783:         MatDestroy(&localChangeOfBasisMatrix);
6784:       }
6785:       /* determine if any process has changed the pressures locally */
6786:       pcbddc->change_interior = pcbddc->benign_have_null;
6787:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6788:       MatDestroy(&pcbddc->ConstraintMatrix);
6789:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6790:       pcbddc->use_qr_single = qr_needed;
6791:     }
6792:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6793:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6794:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6795:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6796:     } else {
6797:       Mat benign_global = NULL;
6798:       if (pcbddc->benign_have_null) {
6799:         Mat tmat;

6801:         pcbddc->change_interior = PETSC_TRUE;
6802:         VecSet(pcis->vec1_global,0.0);
6803:         VecSet(pcis->vec1_N,1.0);
6804:         VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6805:         VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6806:         VecReciprocal(pcis->vec1_global);
6807:         VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6808:         VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6809:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6810:         if (pcbddc->benign_change) {
6811:           Mat M;

6813:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6814:           MatDiagonalScale(M,pcis->vec1_N,NULL);
6815:           MatISSetLocalMat(tmat,M);
6816:           MatDestroy(&M);
6817:         } else {
6818:           Mat         eye;
6819:           PetscScalar *array;

6821:           VecGetArray(pcis->vec1_N,&array);
6822:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);
6823:           for (i=0;i<pcis->n;i++) {
6824:             MatSetValue(eye,i,i,array[i],INSERT_VALUES);
6825:           }
6826:           VecRestoreArray(pcis->vec1_N,&array);
6827:           MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);
6828:           MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);
6829:           MatISSetLocalMat(tmat,eye);
6830:           MatDestroy(&eye);
6831:         }
6832:         MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);
6833:         MatDestroy(&tmat);
6834:       }
6835:       if (pcbddc->user_ChangeOfBasisMatrix) {
6836:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6837:         MatDestroy(&benign_global);
6838:       } else if (pcbddc->benign_have_null) {
6839:         pcbddc->ChangeOfBasisMatrix = benign_global;
6840:       }
6841:     }
6842:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6843:       IS             is_global;
6844:       const PetscInt *gidxs;

6846:       ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6847:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6848:       ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6849:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6850:       ISDestroy(&is_global);
6851:     }
6852:   }
6853:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6854:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6855:   }

6857:   if (!pcbddc->fake_change) {
6858:     /* add pressure dofs to set of primal nodes for numbering purposes */
6859:     for (i=0;i<pcbddc->benign_n;i++) {
6860:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6861:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6862:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6863:       pcbddc->local_primal_size_cc++;
6864:       pcbddc->local_primal_size++;
6865:     }

6867:     /* check if a new primal space has been introduced (also take into account benign trick) */
6868:     pcbddc->new_primal_space_local = PETSC_TRUE;
6869:     if (olocal_primal_size == pcbddc->local_primal_size) {
6870:       PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6871:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6872:       if (!pcbddc->new_primal_space_local) {
6873:         PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6874:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6875:       }
6876:     }
6877:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6878:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
6879:   }
6880:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

6882:   /* flush dbg viewer */
6883:   if (pcbddc->dbg_flag) {
6884:     PetscViewerFlush(pcbddc->dbg_viewer);
6885:   }

6887:   /* free workspace */
6888:   PetscBTDestroy(&qr_needed_idx);
6889:   PetscBTDestroy(&change_basis);
6890:   if (!pcbddc->adaptive_selection) {
6891:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
6892:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
6893:   } else {
6894:     PetscFree5(pcbddc->adaptive_constraints_n,
6895:                       pcbddc->adaptive_constraints_idxs_ptr,
6896:                       pcbddc->adaptive_constraints_data_ptr,
6897:                       pcbddc->adaptive_constraints_idxs,
6898:                       pcbddc->adaptive_constraints_data);
6899:     PetscFree(constraints_n);
6900:     PetscFree(constraints_idxs_B);
6901:   }
6902:   return(0);
6903: }
6904: /* #undef PETSC_MISSING_LAPACK_GESVD */

6906: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6907: {
6908:   ISLocalToGlobalMapping map;
6909:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
6910:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
6911:   PetscInt               i,N;
6912:   PetscBool              rcsr = PETSC_FALSE;
6913:   PetscErrorCode         ierr;

6916:   if (pcbddc->recompute_topography) {
6917:     pcbddc->graphanalyzed = PETSC_FALSE;
6918:     /* Reset previously computed graph */
6919:     PCBDDCGraphReset(pcbddc->mat_graph);
6920:     /* Init local Graph struct */
6921:     MatGetSize(pc->pmat,&N,NULL);
6922:     MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
6923:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

6925:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
6926:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
6927:     }
6928:     /* Check validity of the csr graph passed in by the user */
6929:     if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);

6931:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6932:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6933:       PetscInt  *xadj,*adjncy;
6934:       PetscInt  nvtxs;
6935:       PetscBool flg_row=PETSC_FALSE;

6937:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6938:       if (flg_row) {
6939:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
6940:         pcbddc->computed_rowadj = PETSC_TRUE;
6941:       }
6942:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6943:       rcsr = PETSC_TRUE;
6944:     }
6945:     if (pcbddc->dbg_flag) {
6946:       PetscViewerFlush(pcbddc->dbg_viewer);
6947:     }

6949:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6950:       PetscReal    *lcoords;
6951:       PetscInt     n;
6952:       MPI_Datatype dimrealtype;

6954:       if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
6955:       MatGetLocalSize(matis->A,&n,NULL);
6956:       MatISSetUpSF(pc->pmat);
6957:       PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
6958:       MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
6959:       MPI_Type_commit(&dimrealtype);
6960:       PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6961:       PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6962:       MPI_Type_free(&dimrealtype);
6963:       PetscFree(pcbddc->mat_graph->coords);

6965:       pcbddc->mat_graph->coords = lcoords;
6966:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
6967:       pcbddc->mat_graph->cnloc  = n;
6968:     }
6969:     if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
6970:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);

6972:     /* Setup of Graph */
6973:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6974:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

6976:     /* attach info on disconnected subdomains if present */
6977:     if (pcbddc->n_local_subs) {
6978:       PetscInt *local_subs;

6980:       PetscMalloc1(N,&local_subs);
6981:       for (i=0;i<pcbddc->n_local_subs;i++) {
6982:         const PetscInt *idxs;
6983:         PetscInt       nl,j;

6985:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
6986:         ISGetIndices(pcbddc->local_subs[i],&idxs);
6987:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
6988:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
6989:       }
6990:       pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
6991:       pcbddc->mat_graph->local_subs = local_subs;
6992:     }
6993:   }

6995:   if (!pcbddc->graphanalyzed) {
6996:     /* Graph's connected components analysis */
6997:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6998:     pcbddc->graphanalyzed = PETSC_TRUE;
6999:   }
7000:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7001:   return(0);
7002: }

7004: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
7005: {
7006:   PetscInt       i,j;
7007:   PetscScalar    *alphas;

7011:   if (!n) return(0);
7012:   PetscMalloc1(n,&alphas);
7013:   VecNormalize(vecs[0],NULL);
7014:   for (i=1;i<n;i++) {
7015:     VecMDot(vecs[i],i,vecs,alphas);
7016:     for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7017:     VecMAXPY(vecs[i],i,alphas,vecs);
7018:     VecNormalize(vecs[i],NULL);
7019:   }
7020:   PetscFree(alphas);
7021:   return(0);
7022: }

7024: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7025: {
7026:   Mat            A;
7027:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
7028:   PetscMPIInt    size,rank,color;
7029:   PetscInt       *xadj,*adjncy;
7030:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7031:   PetscInt       im_active,active_procs,N,n,i,j,threshold = 2;
7032:   PetscInt       void_procs,*procs_candidates = NULL;
7033:   PetscInt       xadj_count,*count;
7034:   PetscBool      ismatis,use_vwgt=PETSC_FALSE;
7035:   PetscSubcomm   psubcomm;
7036:   MPI_Comm       subcomm;

7041:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7042:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7045:   if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains);

7047:   if (have_void) *have_void = PETSC_FALSE;
7048:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7049:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7050:   MatISGetLocalMat(mat,&A);
7051:   MatGetLocalSize(A,&n,NULL);
7052:   im_active = !!n;
7053:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7054:   void_procs = size - active_procs;
7055:   /* get ranks of of non-active processes in mat communicator */
7056:   if (void_procs) {
7057:     PetscInt ncand;

7059:     if (have_void) *have_void = PETSC_TRUE;
7060:     PetscMalloc1(size,&procs_candidates);
7061:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7062:     for (i=0,ncand=0;i<size;i++) {
7063:       if (!procs_candidates[i]) {
7064:         procs_candidates[ncand++] = i;
7065:       }
7066:     }
7067:     /* force n_subdomains to be not greater that the number of non-active processes */
7068:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
7069:   }

7071:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7072:      number of subdomains requested 1 -> send to master or first candidate in voids  */
7073:   MatGetSize(mat,&N,NULL);
7074:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7075:     PetscInt issize,isidx,dest;
7076:     if (*n_subdomains == 1) dest = 0;
7077:     else dest = rank;
7078:     if (im_active) {
7079:       issize = 1;
7080:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7081:         isidx = procs_candidates[dest];
7082:       } else {
7083:         isidx = dest;
7084:       }
7085:     } else {
7086:       issize = 0;
7087:       isidx = -1;
7088:     }
7089:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7090:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7091:     PetscFree(procs_candidates);
7092:     return(0);
7093:   }
7094:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7095:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7096:   threshold = PetscMax(threshold,2);

7098:   /* Get info on mapping */
7099:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

7101:   /* build local CSR graph of subdomains' connectivity */
7102:   PetscMalloc1(2,&xadj);
7103:   xadj[0] = 0;
7104:   xadj[1] = PetscMax(n_neighs-1,0);
7105:   PetscMalloc1(xadj[1],&adjncy);
7106:   PetscMalloc1(xadj[1],&adjncy_wgt);
7107:   PetscCalloc1(n,&count);
7108:   for (i=1;i<n_neighs;i++)
7109:     for (j=0;j<n_shared[i];j++)
7110:       count[shared[i][j]] += 1;

7112:   xadj_count = 0;
7113:   for (i=1;i<n_neighs;i++) {
7114:     for (j=0;j<n_shared[i];j++) {
7115:       if (count[shared[i][j]] < threshold) {
7116:         adjncy[xadj_count] = neighs[i];
7117:         adjncy_wgt[xadj_count] = n_shared[i];
7118:         xadj_count++;
7119:         break;
7120:       }
7121:     }
7122:   }
7123:   xadj[1] = xadj_count;
7124:   PetscFree(count);
7125:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7126:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

7128:   PetscMalloc1(1,&ranks_send_to_idx);

7130:   /* Restrict work on active processes only */
7131:   PetscMPIIntCast(im_active,&color);
7132:   if (void_procs) {
7133:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7134:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7135:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7136:     subcomm = PetscSubcommChild(psubcomm);
7137:   } else {
7138:     psubcomm = NULL;
7139:     subcomm = PetscObjectComm((PetscObject)mat);
7140:   }

7142:   v_wgt = NULL;
7143:   if (!color) {
7144:     PetscFree(xadj);
7145:     PetscFree(adjncy);
7146:     PetscFree(adjncy_wgt);
7147:   } else {
7148:     Mat             subdomain_adj;
7149:     IS              new_ranks,new_ranks_contig;
7150:     MatPartitioning partitioner;
7151:     PetscInt        rstart=0,rend=0;
7152:     PetscInt        *is_indices,*oldranks;
7153:     PetscMPIInt     size;
7154:     PetscBool       aggregate;

7156:     MPI_Comm_size(subcomm,&size);
7157:     if (void_procs) {
7158:       PetscInt prank = rank;
7159:       PetscMalloc1(size,&oldranks);
7160:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7161:       for (i=0;i<xadj[1];i++) {
7162:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7163:       }
7164:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7165:     } else {
7166:       oldranks = NULL;
7167:     }
7168:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7169:     if (aggregate) { /* TODO: all this part could be made more efficient */
7170:       PetscInt    lrows,row,ncols,*cols;
7171:       PetscMPIInt nrank;
7172:       PetscScalar *vals;

7174:       MPI_Comm_rank(subcomm,&nrank);
7175:       lrows = 0;
7176:       if (nrank<redprocs) {
7177:         lrows = size/redprocs;
7178:         if (nrank<size%redprocs) lrows++;
7179:       }
7180:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7181:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7182:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7183:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7184:       row = nrank;
7185:       ncols = xadj[1]-xadj[0];
7186:       cols = adjncy;
7187:       PetscMalloc1(ncols,&vals);
7188:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7189:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7190:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7191:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7192:       PetscFree(xadj);
7193:       PetscFree(adjncy);
7194:       PetscFree(adjncy_wgt);
7195:       PetscFree(vals);
7196:       if (use_vwgt) {
7197:         Vec               v;
7198:         const PetscScalar *array;
7199:         PetscInt          nl;

7201:         MatCreateVecs(subdomain_adj,&v,NULL);
7202:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7203:         VecAssemblyBegin(v);
7204:         VecAssemblyEnd(v);
7205:         VecGetLocalSize(v,&nl);
7206:         VecGetArrayRead(v,&array);
7207:         PetscMalloc1(nl,&v_wgt);
7208:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7209:         VecRestoreArrayRead(v,&array);
7210:         VecDestroy(&v);
7211:       }
7212:     } else {
7213:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7214:       if (use_vwgt) {
7215:         PetscMalloc1(1,&v_wgt);
7216:         v_wgt[0] = n;
7217:       }
7218:     }
7219:     /* MatView(subdomain_adj,0); */

7221:     /* Partition */
7222:     MatPartitioningCreate(subcomm,&partitioner);
7223:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7224:     if (v_wgt) {
7225:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
7226:     }
7227:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7228:     MatPartitioningSetNParts(partitioner,*n_subdomains);
7229:     MatPartitioningSetFromOptions(partitioner);
7230:     MatPartitioningApply(partitioner,&new_ranks);
7231:     /* MatPartitioningView(partitioner,0); */

7233:     /* renumber new_ranks to avoid "holes" in new set of processors */
7234:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7235:     ISDestroy(&new_ranks);
7236:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7237:     if (!aggregate) {
7238:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7239: #if defined(PETSC_USE_DEBUG)
7240:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7241: #endif
7242:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7243:       } else if (oldranks) {
7244:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7245:       } else {
7246:         ranks_send_to_idx[0] = is_indices[0];
7247:       }
7248:     } else {
7249:       PetscInt    idx = 0;
7250:       PetscMPIInt tag;
7251:       MPI_Request *reqs;

7253:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7254:       PetscMalloc1(rend-rstart,&reqs);
7255:       for (i=rstart;i<rend;i++) {
7256:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7257:       }
7258:       MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7259:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7260:       PetscFree(reqs);
7261:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7262: #if defined(PETSC_USE_DEBUG)
7263:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7264: #endif
7265:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7266:       } else if (oldranks) {
7267:         ranks_send_to_idx[0] = oldranks[idx];
7268:       } else {
7269:         ranks_send_to_idx[0] = idx;
7270:       }
7271:     }
7272:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7273:     /* clean up */
7274:     PetscFree(oldranks);
7275:     ISDestroy(&new_ranks_contig);
7276:     MatDestroy(&subdomain_adj);
7277:     MatPartitioningDestroy(&partitioner);
7278:   }
7279:   PetscSubcommDestroy(&psubcomm);
7280:   PetscFree(procs_candidates);

7282:   /* assemble parallel IS for sends */
7283:   i = 1;
7284:   if (!color) i=0;
7285:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7286:   return(0);
7287: }

7289: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

7291: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7292: {
7293:   Mat                    local_mat;
7294:   IS                     is_sends_internal;
7295:   PetscInt               rows,cols,new_local_rows;
7296:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7297:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
7298:   ISLocalToGlobalMapping l2gmap;
7299:   PetscInt*              l2gmap_indices;
7300:   const PetscInt*        is_indices;
7301:   MatType                new_local_type;
7302:   /* buffers */
7303:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7304:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7305:   PetscInt               *recv_buffer_idxs_local;
7306:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
7307:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7308:   /* MPI */
7309:   MPI_Comm               comm,comm_n;
7310:   PetscSubcomm           subcomm;
7311:   PetscMPIInt            n_sends,n_recvs,commsize;
7312:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7313:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7314:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7315:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7316:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7317:   PetscErrorCode         ierr;

7321:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7322:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7329:   if (nvecs) {
7330:     if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7332:   }
7333:   /* further checks */
7334:   MatISGetLocalMat(mat,&local_mat);
7335:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7336:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7337:   MatGetSize(local_mat,&rows,&cols);
7338:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7339:   if (reuse && *mat_n) {
7340:     PetscInt mrows,mcols,mnrows,mncols;
7342:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7343:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7344:     MatGetSize(mat,&mrows,&mcols);
7345:     MatGetSize(*mat_n,&mnrows,&mncols);
7346:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7347:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7348:   }
7349:   MatGetBlockSize(local_mat,&bs);

7352:   /* prepare IS for sending if not provided */
7353:   if (!is_sends) {
7354:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7355:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7356:   } else {
7357:     PetscObjectReference((PetscObject)is_sends);
7358:     is_sends_internal = is_sends;
7359:   }

7361:   /* get comm */
7362:   PetscObjectGetComm((PetscObject)mat,&comm);

7364:   /* compute number of sends */
7365:   ISGetLocalSize(is_sends_internal,&i);
7366:   PetscMPIIntCast(i,&n_sends);

7368:   /* compute number of receives */
7369:   MPI_Comm_size(comm,&commsize);
7370:   PetscMalloc1(commsize,&iflags);
7371:   PetscMemzero(iflags,commsize*sizeof(*iflags));
7372:   ISGetIndices(is_sends_internal,&is_indices);
7373:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7374:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7375:   PetscFree(iflags);

7377:   /* restrict comm if requested */
7378:   subcomm = 0;
7379:   destroy_mat = PETSC_FALSE;
7380:   if (restrict_comm) {
7381:     PetscMPIInt color,subcommsize;

7383:     color = 0;
7384:     if (restrict_full) {
7385:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7386:     } else {
7387:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7388:     }
7389:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7390:     subcommsize = commsize - subcommsize;
7391:     /* check if reuse has been requested */
7392:     if (reuse) {
7393:       if (*mat_n) {
7394:         PetscMPIInt subcommsize2;
7395:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7396:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7397:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7398:       } else {
7399:         comm_n = PETSC_COMM_SELF;
7400:       }
7401:     } else { /* MAT_INITIAL_MATRIX */
7402:       PetscMPIInt rank;

7404:       MPI_Comm_rank(comm,&rank);
7405:       PetscSubcommCreate(comm,&subcomm);
7406:       PetscSubcommSetNumber(subcomm,2);
7407:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7408:       comm_n = PetscSubcommChild(subcomm);
7409:     }
7410:     /* flag to destroy *mat_n if not significative */
7411:     if (color) destroy_mat = PETSC_TRUE;
7412:   } else {
7413:     comm_n = comm;
7414:   }

7416:   /* prepare send/receive buffers */
7417:   PetscMalloc1(commsize,&ilengths_idxs);
7418:   PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
7419:   PetscMalloc1(commsize,&ilengths_vals);
7420:   PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
7421:   if (nis) {
7422:     PetscCalloc1(commsize,&ilengths_idxs_is);
7423:   }

7425:   /* Get data from local matrices */
7426:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7427:     /* TODO: See below some guidelines on how to prepare the local buffers */
7428:     /*
7429:        send_buffer_vals should contain the raw values of the local matrix
7430:        send_buffer_idxs should contain:
7431:        - MatType_PRIVATE type
7432:        - PetscInt        size_of_l2gmap
7433:        - PetscInt        global_row_indices[size_of_l2gmap]
7434:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7435:     */
7436:   else {
7437:     MatDenseGetArray(local_mat,&send_buffer_vals);
7438:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7439:     PetscMalloc1(i+2,&send_buffer_idxs);
7440:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7441:     send_buffer_idxs[1] = i;
7442:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7443:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7444:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7445:     PetscMPIIntCast(i,&len);
7446:     for (i=0;i<n_sends;i++) {
7447:       ilengths_vals[is_indices[i]] = len*len;
7448:       ilengths_idxs[is_indices[i]] = len+2;
7449:     }
7450:   }
7451:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7452:   /* additional is (if any) */
7453:   if (nis) {
7454:     PetscMPIInt psum;
7455:     PetscInt j;
7456:     for (j=0,psum=0;j<nis;j++) {
7457:       PetscInt plen;
7458:       ISGetLocalSize(isarray[j],&plen);
7459:       PetscMPIIntCast(plen,&len);
7460:       psum += len+1; /* indices + lenght */
7461:     }
7462:     PetscMalloc1(psum,&send_buffer_idxs_is);
7463:     for (j=0,psum=0;j<nis;j++) {
7464:       PetscInt plen;
7465:       const PetscInt *is_array_idxs;
7466:       ISGetLocalSize(isarray[j],&plen);
7467:       send_buffer_idxs_is[psum] = plen;
7468:       ISGetIndices(isarray[j],&is_array_idxs);
7469:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7470:       ISRestoreIndices(isarray[j],&is_array_idxs);
7471:       psum += plen+1; /* indices + lenght */
7472:     }
7473:     for (i=0;i<n_sends;i++) {
7474:       ilengths_idxs_is[is_indices[i]] = psum;
7475:     }
7476:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7477:   }
7478:   MatISRestoreLocalMat(mat,&local_mat);

7480:   buf_size_idxs = 0;
7481:   buf_size_vals = 0;
7482:   buf_size_idxs_is = 0;
7483:   buf_size_vecs = 0;
7484:   for (i=0;i<n_recvs;i++) {
7485:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7486:     buf_size_vals += (PetscInt)olengths_vals[i];
7487:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7488:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7489:   }
7490:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7491:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7492:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7493:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7495:   /* get new tags for clean communications */
7496:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7497:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7498:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7499:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7501:   /* allocate for requests */
7502:   PetscMalloc1(n_sends,&send_req_idxs);
7503:   PetscMalloc1(n_sends,&send_req_vals);
7504:   PetscMalloc1(n_sends,&send_req_idxs_is);
7505:   PetscMalloc1(n_sends,&send_req_vecs);
7506:   PetscMalloc1(n_recvs,&recv_req_idxs);
7507:   PetscMalloc1(n_recvs,&recv_req_vals);
7508:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7509:   PetscMalloc1(n_recvs,&recv_req_vecs);

7511:   /* communications */
7512:   ptr_idxs = recv_buffer_idxs;
7513:   ptr_vals = recv_buffer_vals;
7514:   ptr_idxs_is = recv_buffer_idxs_is;
7515:   ptr_vecs = recv_buffer_vecs;
7516:   for (i=0;i<n_recvs;i++) {
7517:     source_dest = onodes[i];
7518:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7519:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7520:     ptr_idxs += olengths_idxs[i];
7521:     ptr_vals += olengths_vals[i];
7522:     if (nis) {
7523:       source_dest = onodes_is[i];
7524:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7525:       ptr_idxs_is += olengths_idxs_is[i];
7526:     }
7527:     if (nvecs) {
7528:       source_dest = onodes[i];
7529:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7530:       ptr_vecs += olengths_idxs[i]-2;
7531:     }
7532:   }
7533:   for (i=0;i<n_sends;i++) {
7534:     PetscMPIIntCast(is_indices[i],&source_dest);
7535:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7536:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7537:     if (nis) {
7538:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7539:     }
7540:     if (nvecs) {
7541:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7542:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7543:     }
7544:   }
7545:   ISRestoreIndices(is_sends_internal,&is_indices);
7546:   ISDestroy(&is_sends_internal);

7548:   /* assemble new l2g map */
7549:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7550:   ptr_idxs = recv_buffer_idxs;
7551:   new_local_rows = 0;
7552:   for (i=0;i<n_recvs;i++) {
7553:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7554:     ptr_idxs += olengths_idxs[i];
7555:   }
7556:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7557:   ptr_idxs = recv_buffer_idxs;
7558:   new_local_rows = 0;
7559:   for (i=0;i<n_recvs;i++) {
7560:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7561:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7562:     ptr_idxs += olengths_idxs[i];
7563:   }
7564:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7565:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7566:   PetscFree(l2gmap_indices);

7568:   /* infer new local matrix type from received local matrices type */
7569:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7570:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7571:   if (n_recvs) {
7572:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7573:     ptr_idxs = recv_buffer_idxs;
7574:     for (i=0;i<n_recvs;i++) {
7575:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7576:         new_local_type_private = MATAIJ_PRIVATE;
7577:         break;
7578:       }
7579:       ptr_idxs += olengths_idxs[i];
7580:     }
7581:     switch (new_local_type_private) {
7582:       case MATDENSE_PRIVATE:
7583:         new_local_type = MATSEQAIJ;
7584:         bs = 1;
7585:         break;
7586:       case MATAIJ_PRIVATE:
7587:         new_local_type = MATSEQAIJ;
7588:         bs = 1;
7589:         break;
7590:       case MATBAIJ_PRIVATE:
7591:         new_local_type = MATSEQBAIJ;
7592:         break;
7593:       case MATSBAIJ_PRIVATE:
7594:         new_local_type = MATSEQSBAIJ;
7595:         break;
7596:       default:
7597:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7598:         break;
7599:     }
7600:   } else { /* by default, new_local_type is seqaij */
7601:     new_local_type = MATSEQAIJ;
7602:     bs = 1;
7603:   }

7605:   /* create MATIS object if needed */
7606:   if (!reuse) {
7607:     MatGetSize(mat,&rows,&cols);
7608:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7609:   } else {
7610:     /* it also destroys the local matrices */
7611:     if (*mat_n) {
7612:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7613:     } else { /* this is a fake object */
7614:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7615:     }
7616:   }
7617:   MatISGetLocalMat(*mat_n,&local_mat);
7618:   MatSetType(local_mat,new_local_type);

7620:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7622:   /* Global to local map of received indices */
7623:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7624:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7625:   ISLocalToGlobalMappingDestroy(&l2gmap);

7627:   /* restore attributes -> type of incoming data and its size */
7628:   buf_size_idxs = 0;
7629:   for (i=0;i<n_recvs;i++) {
7630:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7631:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7632:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7633:   }
7634:   PetscFree(recv_buffer_idxs);

7636:   /* set preallocation */
7637:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7638:   if (!newisdense) {
7639:     PetscInt *new_local_nnz=0;

7641:     ptr_idxs = recv_buffer_idxs_local;
7642:     if (n_recvs) {
7643:       PetscCalloc1(new_local_rows,&new_local_nnz);
7644:     }
7645:     for (i=0;i<n_recvs;i++) {
7646:       PetscInt j;
7647:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7648:         for (j=0;j<*(ptr_idxs+1);j++) {
7649:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7650:         }
7651:       } else {
7652:         /* TODO */
7653:       }
7654:       ptr_idxs += olengths_idxs[i];
7655:     }
7656:     if (new_local_nnz) {
7657:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7658:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7659:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7660:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7661:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7662:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7663:     } else {
7664:       MatSetUp(local_mat);
7665:     }
7666:     PetscFree(new_local_nnz);
7667:   } else {
7668:     MatSetUp(local_mat);
7669:   }

7671:   /* set values */
7672:   ptr_vals = recv_buffer_vals;
7673:   ptr_idxs = recv_buffer_idxs_local;
7674:   for (i=0;i<n_recvs;i++) {
7675:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7676:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7677:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7678:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7679:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7680:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7681:     } else {
7682:       /* TODO */
7683:     }
7684:     ptr_idxs += olengths_idxs[i];
7685:     ptr_vals += olengths_vals[i];
7686:   }
7687:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7688:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7689:   MatISRestoreLocalMat(*mat_n,&local_mat);
7690:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7691:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7692:   PetscFree(recv_buffer_vals);

7694: #if 0
7695:   if (!restrict_comm) { /* check */
7696:     Vec       lvec,rvec;
7697:     PetscReal infty_error;

7699:     MatCreateVecs(mat,&rvec,&lvec);
7700:     VecSetRandom(rvec,NULL);
7701:     MatMult(mat,rvec,lvec);
7702:     VecScale(lvec,-1.0);
7703:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7704:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7705:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7706:     VecDestroy(&rvec);
7707:     VecDestroy(&lvec);
7708:   }
7709: #endif

7711:   /* assemble new additional is (if any) */
7712:   if (nis) {
7713:     PetscInt **temp_idxs,*count_is,j,psum;

7715:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7716:     PetscCalloc1(nis,&count_is);
7717:     ptr_idxs = recv_buffer_idxs_is;
7718:     psum = 0;
7719:     for (i=0;i<n_recvs;i++) {
7720:       for (j=0;j<nis;j++) {
7721:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7722:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7723:         psum += plen;
7724:         ptr_idxs += plen+1; /* shift pointer to received data */
7725:       }
7726:     }
7727:     PetscMalloc1(nis,&temp_idxs);
7728:     PetscMalloc1(psum,&temp_idxs[0]);
7729:     for (i=1;i<nis;i++) {
7730:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7731:     }
7732:     PetscMemzero(count_is,nis*sizeof(PetscInt));
7733:     ptr_idxs = recv_buffer_idxs_is;
7734:     for (i=0;i<n_recvs;i++) {
7735:       for (j=0;j<nis;j++) {
7736:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7737:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7738:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7739:         ptr_idxs += plen+1; /* shift pointer to received data */
7740:       }
7741:     }
7742:     for (i=0;i<nis;i++) {
7743:       ISDestroy(&isarray[i]);
7744:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7745:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7746:     }
7747:     PetscFree(count_is);
7748:     PetscFree(temp_idxs[0]);
7749:     PetscFree(temp_idxs);
7750:   }
7751:   /* free workspace */
7752:   PetscFree(recv_buffer_idxs_is);
7753:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7754:   PetscFree(send_buffer_idxs);
7755:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7756:   if (isdense) {
7757:     MatISGetLocalMat(mat,&local_mat);
7758:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
7759:     MatISRestoreLocalMat(mat,&local_mat);
7760:   } else {
7761:     /* PetscFree(send_buffer_vals); */
7762:   }
7763:   if (nis) {
7764:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7765:     PetscFree(send_buffer_idxs_is);
7766:   }

7768:   if (nvecs) {
7769:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7770:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7771:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7772:     VecDestroy(&nnsp_vec[0]);
7773:     VecCreate(comm_n,&nnsp_vec[0]);
7774:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7775:     VecSetType(nnsp_vec[0],VECSTANDARD);
7776:     /* set values */
7777:     ptr_vals = recv_buffer_vecs;
7778:     ptr_idxs = recv_buffer_idxs_local;
7779:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7780:     for (i=0;i<n_recvs;i++) {
7781:       PetscInt j;
7782:       for (j=0;j<*(ptr_idxs+1);j++) {
7783:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7784:       }
7785:       ptr_idxs += olengths_idxs[i];
7786:       ptr_vals += olengths_idxs[i]-2;
7787:     }
7788:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7789:     VecAssemblyBegin(nnsp_vec[0]);
7790:     VecAssemblyEnd(nnsp_vec[0]);
7791:   }

7793:   PetscFree(recv_buffer_vecs);
7794:   PetscFree(recv_buffer_idxs_local);
7795:   PetscFree(recv_req_idxs);
7796:   PetscFree(recv_req_vals);
7797:   PetscFree(recv_req_vecs);
7798:   PetscFree(recv_req_idxs_is);
7799:   PetscFree(send_req_idxs);
7800:   PetscFree(send_req_vals);
7801:   PetscFree(send_req_vecs);
7802:   PetscFree(send_req_idxs_is);
7803:   PetscFree(ilengths_vals);
7804:   PetscFree(ilengths_idxs);
7805:   PetscFree(olengths_vals);
7806:   PetscFree(olengths_idxs);
7807:   PetscFree(onodes);
7808:   if (nis) {
7809:     PetscFree(ilengths_idxs_is);
7810:     PetscFree(olengths_idxs_is);
7811:     PetscFree(onodes_is);
7812:   }
7813:   PetscSubcommDestroy(&subcomm);
7814:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7815:     MatDestroy(mat_n);
7816:     for (i=0;i<nis;i++) {
7817:       ISDestroy(&isarray[i]);
7818:     }
7819:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7820:       VecDestroy(&nnsp_vec[0]);
7821:     }
7822:     *mat_n = NULL;
7823:   }
7824:   return(0);
7825: }

7827: /* temporary hack into ksp private data structure */
7828:  #include <petsc/private/kspimpl.h>

7830: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7831: {
7832:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7833:   PC_IS                  *pcis = (PC_IS*)pc->data;
7834:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
7835:   Mat                    coarsedivudotp = NULL;
7836:   Mat                    coarseG,t_coarse_mat_is;
7837:   MatNullSpace           CoarseNullSpace = NULL;
7838:   ISLocalToGlobalMapping coarse_islg;
7839:   IS                     coarse_is,*isarray;
7840:   PetscInt               i,im_active=-1,active_procs=-1;
7841:   PetscInt               nis,nisdofs,nisneu,nisvert;
7842:   PC                     pc_temp;
7843:   PCType                 coarse_pc_type;
7844:   KSPType                coarse_ksp_type;
7845:   PetscBool              multilevel_requested,multilevel_allowed;
7846:   PetscBool              coarse_reuse;
7847:   PetscInt               ncoarse,nedcfield;
7848:   PetscBool              compute_vecs = PETSC_FALSE;
7849:   PetscScalar            *array;
7850:   MatReuse               coarse_mat_reuse;
7851:   PetscBool              restr, full_restr, have_void;
7852:   PetscMPIInt            commsize;
7853:   PetscErrorCode         ierr;

7856:   /* Assign global numbering to coarse dofs */
7857:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7858:     PetscInt ocoarse_size;
7859:     compute_vecs = PETSC_TRUE;

7861:     pcbddc->new_primal_space = PETSC_TRUE;
7862:     ocoarse_size = pcbddc->coarse_size;
7863:     PetscFree(pcbddc->global_primal_indices);
7864:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
7865:     /* see if we can avoid some work */
7866:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7867:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7868:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7869:         KSPReset(pcbddc->coarse_ksp);
7870:         coarse_reuse = PETSC_FALSE;
7871:       } else { /* we can safely reuse already computed coarse matrix */
7872:         coarse_reuse = PETSC_TRUE;
7873:       }
7874:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7875:       coarse_reuse = PETSC_FALSE;
7876:     }
7877:     /* reset any subassembling information */
7878:     if (!coarse_reuse || pcbddc->recompute_topography) {
7879:       ISDestroy(&pcbddc->coarse_subassembling);
7880:     }
7881:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
7882:     coarse_reuse = PETSC_TRUE;
7883:   }
7884:   /* assemble coarse matrix */
7885:   if (coarse_reuse && pcbddc->coarse_ksp) {
7886:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
7887:     PetscObjectReference((PetscObject)coarse_mat);
7888:     coarse_mat_reuse = MAT_REUSE_MATRIX;
7889:   } else {
7890:     coarse_mat = NULL;
7891:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
7892:   }

7894:   /* creates temporary l2gmap and IS for coarse indexes */
7895:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
7896:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

7898:   /* creates temporary MATIS object for coarse matrix */
7899:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
7900:   MatDenseGetArray(coarse_submat_dense,&array);
7901:   PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
7902:   MatDenseRestoreArray(coarse_submat_dense,&array);
7903:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
7904:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
7905:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7906:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7907:   MatDestroy(&coarse_submat_dense);

7909:   /* count "active" (i.e. with positive local size) and "void" processes */
7910:   im_active = !!(pcis->n);
7911:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

7913:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7914:   /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
7915:   /* full_restr : just use the receivers from the subassembling pattern */
7916:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);
7917:   coarse_mat_is = NULL;
7918:   multilevel_allowed = PETSC_FALSE;
7919:   multilevel_requested = PETSC_FALSE;
7920:   pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
7921:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7922:   if (multilevel_requested) {
7923:     ncoarse = active_procs/pcbddc->coarsening_ratio;
7924:     restr = PETSC_FALSE;
7925:     full_restr = PETSC_FALSE;
7926:   } else {
7927:     ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc;
7928:     restr = PETSC_TRUE;
7929:     full_restr = PETSC_TRUE;
7930:   }
7931:   if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7932:   ncoarse = PetscMax(1,ncoarse);
7933:   if (!pcbddc->coarse_subassembling) {
7934:     if (pcbddc->coarsening_ratio > 1) {
7935:       if (multilevel_requested) {
7936:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7937:       } else {
7938:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7939:       }
7940:     } else {
7941:       PetscMPIInt rank;
7942:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
7943:       have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE;
7944:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
7945:     }
7946:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7947:     PetscInt    psum;
7948:     if (pcbddc->coarse_ksp) psum = 1;
7949:     else psum = 0;
7950:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7951:     if (ncoarse < commsize) have_void = PETSC_TRUE;
7952:   }
7953:   /* determine if we can go multilevel */
7954:   if (multilevel_requested) {
7955:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7956:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7957:   }
7958:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

7960:   /* dump subassembling pattern */
7961:   if (pcbddc->dbg_flag && multilevel_allowed) {
7962:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
7963:   }

7965:   /* compute dofs splitting and neumann boundaries for coarse dofs */
7966:   nedcfield = -1;
7967:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */
7968:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
7969:     const PetscInt         *idxs;
7970:     ISLocalToGlobalMapping tmap;

7972:     /* create map between primal indices (in local representative ordering) and local primal numbering */
7973:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
7974:     /* allocate space for temporary storage */
7975:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
7976:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
7977:     /* allocate for IS array */
7978:     nisdofs = pcbddc->n_ISForDofsLocal;
7979:     if (pcbddc->nedclocal) {
7980:       if (pcbddc->nedfield > -1) {
7981:         nedcfield = pcbddc->nedfield;
7982:       } else {
7983:         nedcfield = 0;
7984:         if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs);
7985:         nisdofs = 1;
7986:       }
7987:     }
7988:     nisneu = !!pcbddc->NeumannBoundariesLocal;
7989:     nisvert = 0; /* nisvert is not used */
7990:     nis = nisdofs + nisneu + nisvert;
7991:     PetscMalloc1(nis,&isarray);
7992:     /* dofs splitting */
7993:     for (i=0;i<nisdofs;i++) {
7994:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
7995:       if (nedcfield != i) {
7996:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
7997:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
7998:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7999:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8000:       } else {
8001:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
8002:         ISGetIndices(pcbddc->nedclocal,&idxs);
8003:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8004:         if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout);
8005:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
8006:       }
8007:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8008:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8009:       /* ISView(isarray[i],0); */
8010:     }
8011:     /* neumann boundaries */
8012:     if (pcbddc->NeumannBoundariesLocal) {
8013:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8014:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8015:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8016:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8017:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8018:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8019:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8020:       /* ISView(isarray[nisdofs],0); */
8021:     }
8022:     /* free memory */
8023:     PetscFree(tidxs);
8024:     PetscFree(tidxs2);
8025:     ISLocalToGlobalMappingDestroy(&tmap);
8026:   } else {
8027:     nis = 0;
8028:     nisdofs = 0;
8029:     nisneu = 0;
8030:     nisvert = 0;
8031:     isarray = NULL;
8032:   }
8033:   /* destroy no longer needed map */
8034:   ISLocalToGlobalMappingDestroy(&coarse_islg);

8036:   /* subassemble */
8037:   if (multilevel_allowed) {
8038:     Vec       vp[1];
8039:     PetscInt  nvecs = 0;
8040:     PetscBool reuse,reuser;

8042:     if (coarse_mat) reuse = PETSC_TRUE;
8043:     else reuse = PETSC_FALSE;
8044:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8045:     vp[0] = NULL;
8046:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8047:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8048:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8049:       VecSetType(vp[0],VECSTANDARD);
8050:       nvecs = 1;

8052:       if (pcbddc->divudotp) {
8053:         Mat      B,loc_divudotp;
8054:         Vec      v,p;
8055:         IS       dummy;
8056:         PetscInt np;

8058:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8059:         MatGetSize(loc_divudotp,&np,NULL);
8060:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8061:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8062:         MatCreateVecs(B,&v,&p);
8063:         VecSet(p,1.);
8064:         MatMultTranspose(B,p,v);
8065:         VecDestroy(&p);
8066:         MatDestroy(&B);
8067:         VecGetArray(vp[0],&array);
8068:         VecPlaceArray(pcbddc->vec1_P,array);
8069:         VecRestoreArray(vp[0],&array);
8070:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8071:         VecResetArray(pcbddc->vec1_P);
8072:         ISDestroy(&dummy);
8073:         VecDestroy(&v);
8074:       }
8075:     }
8076:     if (reuser) {
8077:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8078:     } else {
8079:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8080:     }
8081:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8082:       PetscScalar *arraym,*arrayv;
8083:       PetscInt    nl;
8084:       VecGetLocalSize(vp[0],&nl);
8085:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8086:       MatDenseGetArray(coarsedivudotp,&arraym);
8087:       VecGetArray(vp[0],&arrayv);
8088:       PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
8089:       VecRestoreArray(vp[0],&arrayv);
8090:       MatDenseRestoreArray(coarsedivudotp,&arraym);
8091:       VecDestroy(&vp[0]);
8092:     } else {
8093:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8094:     }
8095:   } else {
8096:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8097:   }
8098:   if (coarse_mat_is || coarse_mat) {
8099:     PetscMPIInt size;
8100:     MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);
8101:     if (!multilevel_allowed) {
8102:       MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
8103:     } else {
8104:       Mat A;

8106:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8107:       if (coarse_mat_is) {
8108:         if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8109:         PetscObjectReference((PetscObject)coarse_mat_is);
8110:         coarse_mat = coarse_mat_is;
8111:       }
8112:       /* be sure we don't have MatSeqDENSE as local mat */
8113:       MatISGetLocalMat(coarse_mat,&A);
8114:       MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
8115:     }
8116:   }
8117:   MatDestroy(&t_coarse_mat_is);
8118:   MatDestroy(&coarse_mat_is);

8120:   /* create local to global scatters for coarse problem */
8121:   if (compute_vecs) {
8122:     PetscInt lrows;
8123:     VecDestroy(&pcbddc->coarse_vec);
8124:     if (coarse_mat) {
8125:       MatGetLocalSize(coarse_mat,&lrows,NULL);
8126:     } else {
8127:       lrows = 0;
8128:     }
8129:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8130:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8131:     VecSetType(pcbddc->coarse_vec,VECSTANDARD);
8132:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8133:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8134:   }
8135:   ISDestroy(&coarse_is);

8137:   /* set defaults for coarse KSP and PC */
8138:   if (multilevel_allowed) {
8139:     coarse_ksp_type = KSPRICHARDSON;
8140:     coarse_pc_type = PCBDDC;
8141:   } else {
8142:     coarse_ksp_type = KSPPREONLY;
8143:     coarse_pc_type = PCREDUNDANT;
8144:   }

8146:   /* print some info if requested */
8147:   if (pcbddc->dbg_flag) {
8148:     if (!multilevel_allowed) {
8149:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8150:       if (multilevel_requested) {
8151:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8152:       } else if (pcbddc->max_levels) {
8153:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
8154:       }
8155:       PetscViewerFlush(pcbddc->dbg_viewer);
8156:     }
8157:   }

8159:   /* communicate coarse discrete gradient */
8160:   coarseG = NULL;
8161:   if (pcbddc->nedcG && multilevel_allowed) {
8162:     MPI_Comm ccomm;
8163:     if (coarse_mat) {
8164:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8165:     } else {
8166:       ccomm = MPI_COMM_NULL;
8167:     }
8168:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8169:   }

8171:   /* create the coarse KSP object only once with defaults */
8172:   if (coarse_mat) {
8173:     PetscBool   isredundant,isnn,isbddc;
8174:     PetscViewer dbg_viewer = NULL;

8176:     if (pcbddc->dbg_flag) {
8177:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8178:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8179:     }
8180:     if (!pcbddc->coarse_ksp) {
8181:       char   prefix[256],str_level[16];
8182:       size_t len;

8184:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8185:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8186:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8187:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8188:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8189:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8190:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8191:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8192:       /* TODO is this logic correct? should check for coarse_mat type */
8193:       PCSetType(pc_temp,coarse_pc_type);
8194:       /* prefix */
8195:       PetscStrcpy(prefix,"");
8196:       PetscStrcpy(str_level,"");
8197:       if (!pcbddc->current_level) {
8198:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8199:         PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8200:       } else {
8201:         PetscStrlen(((PetscObject)pc)->prefix,&len);
8202:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8203:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8204:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8205:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8206:         PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8207:         PetscStrlcat(prefix,str_level,sizeof(prefix));
8208:       }
8209:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8210:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8211:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8212:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8213:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8214:       /* allow user customization */
8215:       KSPSetFromOptions(pcbddc->coarse_ksp);
8216:     }
8217:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8218:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8219:     if (nisdofs) {
8220:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8221:       for (i=0;i<nisdofs;i++) {
8222:         ISDestroy(&isarray[i]);
8223:       }
8224:     }
8225:     if (nisneu) {
8226:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8227:       ISDestroy(&isarray[nisdofs]);
8228:     }
8229:     if (nisvert) {
8230:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8231:       ISDestroy(&isarray[nis-1]);
8232:     }
8233:     if (coarseG) {
8234:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8235:     }

8237:     /* get some info after set from options */
8238:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8239:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8240:     if (isbddc && !multilevel_allowed) {
8241:       PCSetType(pc_temp,coarse_pc_type);
8242:       isbddc = PETSC_FALSE;
8243:     }
8244:     /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8245:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8246:     if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) {
8247:       PCSetType(pc_temp,PCBDDC);
8248:       isbddc = PETSC_TRUE;
8249:     }
8250:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8251:     if (isredundant) {
8252:       KSP inner_ksp;
8253:       PC  inner_pc;

8255:       PCRedundantGetKSP(pc_temp,&inner_ksp);
8256:       KSPGetPC(inner_ksp,&inner_pc);
8257:     }

8259:     /* parameters which miss an API */
8260:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8261:     if (isbddc) {
8262:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;

8264:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8265:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8266:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8267:       if (pcbddc_coarse->benign_saddle_point) {
8268:         Mat                    coarsedivudotp_is;
8269:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8270:         IS                     row,col;
8271:         const PetscInt         *gidxs;
8272:         PetscInt               n,st,M,N;

8274:         MatGetSize(coarsedivudotp,&n,NULL);
8275:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8276:         st   = st-n;
8277:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8278:         MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8279:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
8280:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8281:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8282:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8283:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
8284:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
8285:         ISGetSize(row,&M);
8286:         MatGetSize(coarse_mat,&N,NULL);
8287:         ISDestroy(&row);
8288:         ISDestroy(&col);
8289:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8290:         MatSetType(coarsedivudotp_is,MATIS);
8291:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8292:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8293:         ISLocalToGlobalMappingDestroy(&rl2g);
8294:         ISLocalToGlobalMappingDestroy(&cl2g);
8295:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8296:         MatDestroy(&coarsedivudotp);
8297:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8298:         MatDestroy(&coarsedivudotp_is);
8299:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8300:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8301:       }
8302:     }

8304:     /* propagate symmetry info of coarse matrix */
8305:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8306:     if (pc->pmat->symmetric_set) {
8307:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8308:     }
8309:     if (pc->pmat->hermitian_set) {
8310:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8311:     }
8312:     if (pc->pmat->spd_set) {
8313:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8314:     }
8315:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8316:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8317:     }
8318:     /* set operators */
8319:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8320:     if (pcbddc->dbg_flag) {
8321:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8322:     }
8323:   }
8324:   MatDestroy(&coarseG);
8325:   PetscFree(isarray);
8326: #if 0
8327:   {
8328:     PetscViewer viewer;
8329:     char filename[256];
8330:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8331:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8332:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8333:     MatView(coarse_mat,viewer);
8334:     PetscViewerPopFormat(viewer);
8335:     PetscViewerDestroy(&viewer);
8336:   }
8337: #endif

8339:   if (pcbddc->coarse_ksp) {
8340:     Vec crhs,csol;

8342:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
8343:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8344:     if (!csol) {
8345:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8346:     }
8347:     if (!crhs) {
8348:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8349:     }
8350:   }
8351:   MatDestroy(&coarsedivudotp);

8353:   /* compute null space for coarse solver if the benign trick has been requested */
8354:   if (pcbddc->benign_null) {

8356:     VecSet(pcbddc->vec1_P,0.);
8357:     for (i=0;i<pcbddc->benign_n;i++) {
8358:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8359:     }
8360:     VecAssemblyBegin(pcbddc->vec1_P);
8361:     VecAssemblyEnd(pcbddc->vec1_P);
8362:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8363:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8364:     if (coarse_mat) {
8365:       Vec         nullv;
8366:       PetscScalar *array,*array2;
8367:       PetscInt    nl;

8369:       MatCreateVecs(coarse_mat,&nullv,NULL);
8370:       VecGetLocalSize(nullv,&nl);
8371:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8372:       VecGetArray(nullv,&array2);
8373:       PetscMemcpy(array2,array,nl*sizeof(*array));
8374:       VecRestoreArray(nullv,&array2);
8375:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8376:       VecNormalize(nullv,NULL);
8377:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8378:       VecDestroy(&nullv);
8379:     }
8380:   }

8382:   if (pcbddc->coarse_ksp) {
8383:     PetscBool ispreonly;

8385:     if (CoarseNullSpace) {
8386:       PetscBool isnull;
8387:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8388:       if (isnull) {
8389:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8390:       }
8391:       /* TODO: add local nullspaces (if any) */
8392:     }
8393:     /* setup coarse ksp */
8394:     KSPSetUp(pcbddc->coarse_ksp);
8395:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8396:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8397:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8398:       KSP       check_ksp;
8399:       KSPType   check_ksp_type;
8400:       PC        check_pc;
8401:       Vec       check_vec,coarse_vec;
8402:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8403:       PetscInt  its;
8404:       PetscBool compute_eigs;
8405:       PetscReal *eigs_r,*eigs_c;
8406:       PetscInt  neigs;
8407:       const char *prefix;

8409:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8410:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8411:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8412:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8413:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8414:       /* prevent from setup unneeded object */
8415:       KSPGetPC(check_ksp,&check_pc);
8416:       PCSetType(check_pc,PCNONE);
8417:       if (ispreonly) {
8418:         check_ksp_type = KSPPREONLY;
8419:         compute_eigs = PETSC_FALSE;
8420:       } else {
8421:         check_ksp_type = KSPGMRES;
8422:         compute_eigs = PETSC_TRUE;
8423:       }
8424:       KSPSetType(check_ksp,check_ksp_type);
8425:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8426:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8427:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8428:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8429:       KSPSetOptionsPrefix(check_ksp,prefix);
8430:       KSPAppendOptionsPrefix(check_ksp,"check_");
8431:       KSPSetFromOptions(check_ksp);
8432:       KSPSetUp(check_ksp);
8433:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8434:       KSPSetPC(check_ksp,check_pc);
8435:       /* create random vec */
8436:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8437:       VecSetRandom(check_vec,NULL);
8438:       MatMult(coarse_mat,check_vec,coarse_vec);
8439:       /* solve coarse problem */
8440:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8441:       /* set eigenvalue estimation if preonly has not been requested */
8442:       if (compute_eigs) {
8443:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8444:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8445:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8446:         if (neigs) {
8447:           lambda_max = eigs_r[neigs-1];
8448:           lambda_min = eigs_r[0];
8449:           if (pcbddc->use_coarse_estimates) {
8450:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8451:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8452:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8453:             }
8454:           }
8455:         }
8456:       }

8458:       /* check coarse problem residual error */
8459:       if (pcbddc->dbg_flag) {
8460:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8461:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8462:         VecAXPY(check_vec,-1.0,coarse_vec);
8463:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8464:         MatMult(coarse_mat,check_vec,coarse_vec);
8465:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8466:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8467:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8468:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8469:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8470:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8471:         if (CoarseNullSpace) {
8472:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8473:         }
8474:         if (compute_eigs) {
8475:           PetscReal          lambda_max_s,lambda_min_s;
8476:           KSPConvergedReason reason;
8477:           KSPGetType(check_ksp,&check_ksp_type);
8478:           KSPGetIterationNumber(check_ksp,&its);
8479:           KSPGetConvergedReason(check_ksp,&reason);
8480:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8481:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8482:           for (i=0;i<neigs;i++) {
8483:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8484:           }
8485:         }
8486:         PetscViewerFlush(dbg_viewer);
8487:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8488:       }
8489:       VecDestroy(&check_vec);
8490:       VecDestroy(&coarse_vec);
8491:       KSPDestroy(&check_ksp);
8492:       if (compute_eigs) {
8493:         PetscFree(eigs_r);
8494:         PetscFree(eigs_c);
8495:       }
8496:     }
8497:   }
8498:   MatNullSpaceDestroy(&CoarseNullSpace);
8499:   /* print additional info */
8500:   if (pcbddc->dbg_flag) {
8501:     /* waits until all processes reaches this point */
8502:     PetscBarrier((PetscObject)pc);
8503:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
8504:     PetscViewerFlush(pcbddc->dbg_viewer);
8505:   }

8507:   /* free memory */
8508:   MatDestroy(&coarse_mat);
8509:   return(0);
8510: }

8512: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8513: {
8514:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8515:   PC_IS*         pcis = (PC_IS*)pc->data;
8516:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8517:   IS             subset,subset_mult,subset_n;
8518:   PetscInt       local_size,coarse_size=0;
8519:   PetscInt       *local_primal_indices=NULL;
8520:   const PetscInt *t_local_primal_indices;

8524:   /* Compute global number of coarse dofs */
8525:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8526:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8527:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8528:   ISDestroy(&subset_n);
8529:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8530:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8531:   ISDestroy(&subset);
8532:   ISDestroy(&subset_mult);
8533:   ISGetLocalSize(subset_n,&local_size);
8534:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8535:   PetscMalloc1(local_size,&local_primal_indices);
8536:   ISGetIndices(subset_n,&t_local_primal_indices);
8537:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8538:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8539:   ISDestroy(&subset_n);

8541:   /* check numbering */
8542:   if (pcbddc->dbg_flag) {
8543:     PetscScalar coarsesum,*array,*array2;
8544:     PetscInt    i;
8545:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8547:     PetscViewerFlush(pcbddc->dbg_viewer);
8548:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8549:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8550:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8551:     /* counter */
8552:     VecSet(pcis->vec1_global,0.0);
8553:     VecSet(pcis->vec1_N,1.0);
8554:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8555:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8556:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8557:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8558:     VecSet(pcis->vec1_N,0.0);
8559:     for (i=0;i<pcbddc->local_primal_size;i++) {
8560:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8561:     }
8562:     VecAssemblyBegin(pcis->vec1_N);
8563:     VecAssemblyEnd(pcis->vec1_N);
8564:     VecSet(pcis->vec1_global,0.0);
8565:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8566:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8567:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8568:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8569:     VecGetArray(pcis->vec1_N,&array);
8570:     VecGetArray(pcis->vec2_N,&array2);
8571:     for (i=0;i<pcis->n;i++) {
8572:       if (array[i] != 0.0 && array[i] != array2[i]) {
8573:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8574:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8575:         set_error = PETSC_TRUE;
8576:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8577:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);
8578:       }
8579:     }
8580:     VecRestoreArray(pcis->vec2_N,&array2);
8581:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8582:     PetscViewerFlush(pcbddc->dbg_viewer);
8583:     for (i=0;i<pcis->n;i++) {
8584:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8585:     }
8586:     VecRestoreArray(pcis->vec1_N,&array);
8587:     VecSet(pcis->vec1_global,0.0);
8588:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8589:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8590:     VecSum(pcis->vec1_global,&coarsesum);
8591:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8592:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8593:       PetscInt *gidxs;

8595:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8596:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8597:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8598:       PetscViewerFlush(pcbddc->dbg_viewer);
8599:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8600:       for (i=0;i<pcbddc->local_primal_size;i++) {
8601:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8602:       }
8603:       PetscViewerFlush(pcbddc->dbg_viewer);
8604:       PetscFree(gidxs);
8605:     }
8606:     PetscViewerFlush(pcbddc->dbg_viewer);
8607:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8608:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8609:   }
8610:   /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
8611:   /* get back data */
8612:   *coarse_size_n = coarse_size;
8613:   *local_primal_indices_n = local_primal_indices;
8614:   return(0);
8615: }

8617: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8618: {
8619:   IS             localis_t;
8620:   PetscInt       i,lsize,*idxs,n;
8621:   PetscScalar    *vals;

8625:   /* get indices in local ordering exploiting local to global map */
8626:   ISGetLocalSize(globalis,&lsize);
8627:   PetscMalloc1(lsize,&vals);
8628:   for (i=0;i<lsize;i++) vals[i] = 1.0;
8629:   ISGetIndices(globalis,(const PetscInt**)&idxs);
8630:   VecSet(gwork,0.0);
8631:   VecSet(lwork,0.0);
8632:   if (idxs) { /* multilevel guard */
8633:     VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8634:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8635:   }
8636:   VecAssemblyBegin(gwork);
8637:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8638:   PetscFree(vals);
8639:   VecAssemblyEnd(gwork);
8640:   /* now compute set in local ordering */
8641:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8642:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8643:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8644:   VecGetSize(lwork,&n);
8645:   for (i=0,lsize=0;i<n;i++) {
8646:     if (PetscRealPart(vals[i]) > 0.5) {
8647:       lsize++;
8648:     }
8649:   }
8650:   PetscMalloc1(lsize,&idxs);
8651:   for (i=0,lsize=0;i<n;i++) {
8652:     if (PetscRealPart(vals[i]) > 0.5) {
8653:       idxs[lsize++] = i;
8654:     }
8655:   }
8656:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8657:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8658:   *localis = localis_t;
8659:   return(0);
8660: }

8662: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8663: {
8664:   PC_IS               *pcis=(PC_IS*)pc->data;
8665:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8666:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
8667:   Mat                 S_j;
8668:   PetscInt            *used_xadj,*used_adjncy;
8669:   PetscBool           free_used_adj;
8670:   PetscErrorCode      ierr;

8673:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8674:   free_used_adj = PETSC_FALSE;
8675:   if (pcbddc->sub_schurs_layers == -1) {
8676:     used_xadj = NULL;
8677:     used_adjncy = NULL;
8678:   } else {
8679:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8680:       used_xadj = pcbddc->mat_graph->xadj;
8681:       used_adjncy = pcbddc->mat_graph->adjncy;
8682:     } else if (pcbddc->computed_rowadj) {
8683:       used_xadj = pcbddc->mat_graph->xadj;
8684:       used_adjncy = pcbddc->mat_graph->adjncy;
8685:     } else {
8686:       PetscBool      flg_row=PETSC_FALSE;
8687:       const PetscInt *xadj,*adjncy;
8688:       PetscInt       nvtxs;

8690:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8691:       if (flg_row) {
8692:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8693:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8694:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8695:         free_used_adj = PETSC_TRUE;
8696:       } else {
8697:         pcbddc->sub_schurs_layers = -1;
8698:         used_xadj = NULL;
8699:         used_adjncy = NULL;
8700:       }
8701:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8702:     }
8703:   }

8705:   /* setup sub_schurs data */
8706:   MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8707:   if (!sub_schurs->schur_explicit) {
8708:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8709:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8710:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8711:   } else {
8712:     Mat       change = NULL;
8713:     Vec       scaling = NULL;
8714:     IS        change_primal = NULL, iP;
8715:     PetscInt  benign_n;
8716:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8717:     PetscBool isseqaij,need_change = PETSC_FALSE;
8718:     PetscBool discrete_harmonic = PETSC_FALSE;

8720:     if (!pcbddc->use_vertices && reuse_solvers) {
8721:       PetscInt n_vertices;

8723:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8724:       reuse_solvers = (PetscBool)!n_vertices;
8725:     }
8726:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8727:     if (!isseqaij) {
8728:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8729:       if (matis->A == pcbddc->local_mat) {
8730:         MatDestroy(&pcbddc->local_mat);
8731:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8732:       } else {
8733:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8734:       }
8735:     }
8736:     if (!pcbddc->benign_change_explicit) {
8737:       benign_n = pcbddc->benign_n;
8738:     } else {
8739:       benign_n = 0;
8740:     }
8741:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8742:        We need a global reduction to avoid possible deadlocks.
8743:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8744:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8745:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8746:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8747:       need_change = (PetscBool)(!need_change);
8748:     }
8749:     /* If the user defines additional constraints, we import them here.
8750:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
8751:     if (need_change) {
8752:       PC_IS   *pcisf;
8753:       PC_BDDC *pcbddcf;
8754:       PC      pcf;

8756:       if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
8757:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
8758:       PCSetOperators(pcf,pc->mat,pc->pmat);
8759:       PCSetType(pcf,PCBDDC);

8761:       /* hacks */
8762:       pcisf                        = (PC_IS*)pcf->data;
8763:       pcisf->is_B_local            = pcis->is_B_local;
8764:       pcisf->vec1_N                = pcis->vec1_N;
8765:       pcisf->BtoNmap               = pcis->BtoNmap;
8766:       pcisf->n                     = pcis->n;
8767:       pcisf->n_B                   = pcis->n_B;
8768:       pcbddcf                      = (PC_BDDC*)pcf->data;
8769:       PetscFree(pcbddcf->mat_graph);
8770:       pcbddcf->mat_graph           = pcbddc->mat_graph;
8771:       pcbddcf->use_faces           = PETSC_TRUE;
8772:       pcbddcf->use_change_of_basis = PETSC_TRUE;
8773:       pcbddcf->use_change_on_faces = PETSC_TRUE;
8774:       pcbddcf->use_qr_single       = PETSC_TRUE;
8775:       pcbddcf->fake_change         = PETSC_TRUE;

8777:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
8778:       PCBDDCConstraintsSetUp(pcf);
8779:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
8780:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
8781:       change = pcbddcf->ConstraintMatrix;
8782:       pcbddcf->ConstraintMatrix = NULL;

8784:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
8785:       PetscFree(pcbddcf->sub_schurs);
8786:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
8787:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
8788:       PetscFree(pcbddcf->primal_indices_local_idxs);
8789:       PetscFree(pcbddcf->onearnullvecs_state);
8790:       PetscFree(pcf->data);
8791:       pcf->ops->destroy = NULL;
8792:       pcf->ops->reset   = NULL;
8793:       PCDestroy(&pcf);
8794:     }
8795:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

8797:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
8798:     if (iP) {
8799:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
8800:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
8801:       PetscOptionsEnd();
8802:     }
8803:     if (discrete_harmonic) {
8804:       Mat A;
8805:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
8806:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
8807:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
8808:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8809:       MatDestroy(&A);
8810:     } else {
8811:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8812:     }
8813:     MatDestroy(&change);
8814:     ISDestroy(&change_primal);
8815:   }
8816:   MatDestroy(&S_j);

8818:   /* free adjacency */
8819:   if (free_used_adj) {
8820:     PetscFree2(used_xadj,used_adjncy);
8821:   }
8822:   return(0);
8823: }

8825: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8826: {
8827:   PC_IS               *pcis=(PC_IS*)pc->data;
8828:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8829:   PCBDDCGraph         graph;
8830:   PetscErrorCode      ierr;

8833:   /* attach interface graph for determining subsets */
8834:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8835:     IS       verticesIS,verticescomm;
8836:     PetscInt vsize,*idxs;

8838:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8839:     ISGetSize(verticesIS,&vsize);
8840:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
8841:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
8842:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
8843:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8844:     PCBDDCGraphCreate(&graph);
8845:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
8846:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
8847:     ISDestroy(&verticescomm);
8848:     PCBDDCGraphComputeConnectedComponents(graph);
8849:   } else {
8850:     graph = pcbddc->mat_graph;
8851:   }
8852:   /* print some info */
8853:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8854:     IS       vertices;
8855:     PetscInt nv,nedges,nfaces;
8856:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
8857:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8858:     ISGetSize(vertices,&nv);
8859:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8860:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
8861:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
8862:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
8863:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
8864:     PetscViewerFlush(pcbddc->dbg_viewer);
8865:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8866:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8867:   }

8869:   /* sub_schurs init */
8870:   if (!pcbddc->sub_schurs) {
8871:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8872:   }
8873:   PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);

8875:   /* free graph struct */
8876:   if (pcbddc->sub_schurs_rebuild) {
8877:     PCBDDCGraphDestroy(&graph);
8878:   }
8879:   return(0);
8880: }

8882: PetscErrorCode PCBDDCCheckOperator(PC pc)
8883: {
8884:   PC_IS               *pcis=(PC_IS*)pc->data;
8885:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8886:   PetscErrorCode      ierr;

8889:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8890:     IS             zerodiag = NULL;
8891:     Mat            S_j,B0_B=NULL;
8892:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
8893:     PetscScalar    *p0_check,*array,*array2;
8894:     PetscReal      norm;
8895:     PetscInt       i;

8897:     /* B0 and B0_B */
8898:     if (zerodiag) {
8899:       IS       dummy;

8901:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
8902:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
8903:       MatCreateVecs(B0_B,NULL,&dummy_vec);
8904:       ISDestroy(&dummy);
8905:     }
8906:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8907:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
8908:     VecSet(pcbddc->vec1_P,1.0);
8909:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8910:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8911:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8912:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8913:     VecReciprocal(vec_scale_P);
8914:     /* S_j */
8915:     MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8916:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

8918:     /* mimic vector in \widetilde{W}_\Gamma */
8919:     VecSetRandom(pcis->vec1_N,NULL);
8920:     /* continuous in primal space */
8921:     VecSetRandom(pcbddc->coarse_vec,NULL);
8922:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8923:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8924:     VecGetArray(pcbddc->vec1_P,&array);
8925:     PetscCalloc1(pcbddc->benign_n,&p0_check);
8926:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
8927:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8928:     VecRestoreArray(pcbddc->vec1_P,&array);
8929:     VecAssemblyBegin(pcis->vec1_N);
8930:     VecAssemblyEnd(pcis->vec1_N);
8931:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8932:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8933:     VecDuplicate(pcis->vec2_B,&vec_check_B);
8934:     VecCopy(pcis->vec2_B,vec_check_B);

8936:     /* assemble rhs for coarse problem */
8937:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8938:     /* local with Schur */
8939:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
8940:     if (zerodiag) {
8941:       VecGetArray(dummy_vec,&array);
8942:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
8943:       VecRestoreArray(dummy_vec,&array);
8944:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
8945:     }
8946:     /* sum on primal nodes the local contributions */
8947:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8948:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8949:     VecGetArray(pcis->vec1_N,&array);
8950:     VecGetArray(pcbddc->vec1_P,&array2);
8951:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8952:     VecRestoreArray(pcbddc->vec1_P,&array2);
8953:     VecRestoreArray(pcis->vec1_N,&array);
8954:     VecSet(pcbddc->coarse_vec,0.);
8955:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8956:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8957:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8958:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8959:     VecGetArray(pcbddc->vec1_P,&array);
8960:     /* scale primal nodes (BDDC sums contibutions) */
8961:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
8962:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8963:     VecRestoreArray(pcbddc->vec1_P,&array);
8964:     VecAssemblyBegin(pcis->vec1_N);
8965:     VecAssemblyEnd(pcis->vec1_N);
8966:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8967:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8968:     /* global: \widetilde{B0}_B w_\Gamma */
8969:     if (zerodiag) {
8970:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
8971:       VecGetArray(dummy_vec,&array);
8972:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
8973:       VecRestoreArray(dummy_vec,&array);
8974:     }
8975:     /* BDDC */
8976:     VecSet(pcis->vec1_D,0.);
8977:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

8979:     VecCopy(pcis->vec1_B,pcis->vec2_B);
8980:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
8981:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
8982:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
8983:     for (i=0;i<pcbddc->benign_n;i++) {
8984:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
8985:     }
8986:     PetscFree(p0_check);
8987:     VecDestroy(&vec_scale_P);
8988:     VecDestroy(&vec_check_B);
8989:     VecDestroy(&dummy_vec);
8990:     MatDestroy(&S_j);
8991:     MatDestroy(&B0_B);
8992:   }
8993:   return(0);
8994: }

8996:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
8997: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8998: {
8999:   Mat            At;
9000:   IS             rows;
9001:   PetscInt       rst,ren;
9003:   PetscLayout    rmap;

9006:   rst = ren = 0;
9007:   if (ccomm != MPI_COMM_NULL) {
9008:     PetscLayoutCreate(ccomm,&rmap);
9009:     PetscLayoutSetSize(rmap,A->rmap->N);
9010:     PetscLayoutSetBlockSize(rmap,1);
9011:     PetscLayoutSetUp(rmap);
9012:     PetscLayoutGetRange(rmap,&rst,&ren);
9013:   }
9014:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9015:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9016:   ISDestroy(&rows);

9018:   if (ccomm != MPI_COMM_NULL) {
9019:     Mat_MPIAIJ *a,*b;
9020:     IS         from,to;
9021:     Vec        gvec;
9022:     PetscInt   lsize;

9024:     MatCreate(ccomm,B);
9025:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9026:     MatSetType(*B,MATAIJ);
9027:     PetscLayoutDestroy(&((*B)->rmap));
9028:     PetscLayoutSetUp((*B)->cmap);
9029:     a    = (Mat_MPIAIJ*)At->data;
9030:     b    = (Mat_MPIAIJ*)(*B)->data;
9031:     MPI_Comm_size(ccomm,&b->size);
9032:     MPI_Comm_rank(ccomm,&b->rank);
9033:     PetscObjectReference((PetscObject)a->A);
9034:     PetscObjectReference((PetscObject)a->B);
9035:     b->A = a->A;
9036:     b->B = a->B;

9038:     b->donotstash      = a->donotstash;
9039:     b->roworiented     = a->roworiented;
9040:     b->rowindices      = 0;
9041:     b->rowvalues       = 0;
9042:     b->getrowactive    = PETSC_FALSE;

9044:     (*B)->rmap         = rmap;
9045:     (*B)->factortype   = A->factortype;
9046:     (*B)->assembled    = PETSC_TRUE;
9047:     (*B)->insertmode   = NOT_SET_VALUES;
9048:     (*B)->preallocated = PETSC_TRUE;

9050:     if (a->colmap) {
9051: #if defined(PETSC_USE_CTABLE)
9052:       PetscTableCreateCopy(a->colmap,&b->colmap);
9053: #else
9054:       PetscMalloc1(At->cmap->N,&b->colmap);
9055:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9056:       PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
9057: #endif
9058:     } else b->colmap = 0;
9059:     if (a->garray) {
9060:       PetscInt len;
9061:       len  = a->B->cmap->n;
9062:       PetscMalloc1(len+1,&b->garray);
9063:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9064:       if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
9065:     } else b->garray = 0;

9067:     PetscObjectReference((PetscObject)a->lvec);
9068:     b->lvec = a->lvec;
9069:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

9071:     /* cannot use VecScatterCopy */
9072:     VecGetLocalSize(b->lvec,&lsize);
9073:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9074:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9075:     MatCreateVecs(*B,&gvec,NULL);
9076:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9077:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9078:     ISDestroy(&from);
9079:     ISDestroy(&to);
9080:     VecDestroy(&gvec);
9081:   }
9082:   MatDestroy(&At);
9083:   return(0);
9084: }