Actual source code: bddcprivate.c

petsc-3.8.3 2017-12-09
Report Typos and Errors
  1:  #include <../src/mat/impls/aij/seq/aij.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  3:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4:  #include <petscdmplex.h>
  5:  #include <petscblaslapack.h>
  6:  #include <petsc/private/sfimpl.h>
  7:  #include <petsc/private/dmpleximpl.h>

  9: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 11: /* if range is true,  it returns B s.t. span{B} = range(A)
 12:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 13: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 14: {
 15: #if !defined(PETSC_USE_COMPLEX)
 16:   PetscScalar    *uwork,*data,*U, ds = 0.;
 17:   PetscReal      *sing;
 18:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 19:   PetscInt       ulw,i,nr,nc,n;

 23: #if defined(PETSC_MISSING_LAPACK_GESVD)
 24:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
 25: #else
 26:   MatGetSize(A,&nr,&nc);
 27:   if (!nr || !nc) return(0);

 29:   /* workspace */
 30:   if (!work) {
 31:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 32:     PetscMalloc1(ulw,&uwork);
 33:   } else {
 34:     ulw   = lw;
 35:     uwork = work;
 36:   }
 37:   n = PetscMin(nr,nc);
 38:   if (!rwork) {
 39:     PetscMalloc1(n,&sing);
 40:   } else {
 41:     sing = rwork;
 42:   }

 44:   /* SVD */
 45:   PetscMalloc1(nr*nr,&U);
 46:   PetscBLASIntCast(nr,&bM);
 47:   PetscBLASIntCast(nc,&bN);
 48:   PetscBLASIntCast(ulw,&lwork);
 49:   MatDenseGetArray(A,&data);
 50:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 51:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 52:   PetscFPTrapPop();
 53:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
 54:   MatDenseRestoreArray(A,&data);
 55:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 56:   if (!rwork) {
 57:     PetscFree(sing);
 58:   }
 59:   if (!work) {
 60:     PetscFree(uwork);
 61:   }
 62:   /* create B */
 63:   if (!range) {
 64:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 65:     MatDenseGetArray(*B,&data);
 66:     PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
 67:   } else {
 68:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 69:     MatDenseGetArray(*B,&data);
 70:     PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
 71:   }
 72:   MatDenseRestoreArray(*B,&data);
 73:   PetscFree(U);
 74: #endif
 75: #else /* PETSC_USE_COMPLEX */
 77:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
 78: #endif
 79:   return(0);
 80: }

 82: /* TODO REMOVE */
 83: #if defined(PRINT_GDET)
 84: static int inc = 0;
 85: static int lev = 0;
 86: #endif

 88: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 89: {
 91:   Mat            GE,GEd;
 92:   PetscInt       rsize,csize,esize;
 93:   PetscScalar    *ptr;

 96:   ISGetSize(edge,&esize);
 97:   if (!esize) return(0);
 98:   ISGetSize(extrow,&rsize);
 99:   ISGetSize(extcol,&csize);

101:   /* gradients */
102:   ptr  = work + 5*esize;
103:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
104:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
105:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
106:   MatDestroy(&GE);

108:   /* constants */
109:   ptr += rsize*csize;
110:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
111:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
112:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
113:   MatDestroy(&GE);
114:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
115:   MatDestroy(&GEd);

117:   if (corners) {
118:     Mat            GEc;
119:     PetscScalar    *vals,v;

121:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
122:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
123:     MatDenseGetArray(GEd,&vals);
124:     /* v    = PetscAbsScalar(vals[0]) */;
125:     v    = 1.;
126:     cvals[0] = vals[0]/v;
127:     cvals[1] = vals[1]/v;
128:     MatDenseRestoreArray(GEd,&vals);
129:     MatScale(*GKins,1./v);
130: #if defined(PRINT_GDET)
131:     {
132:       PetscViewer viewer;
133:       char filename[256];
134:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
135:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
136:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
137:       PetscObjectSetName((PetscObject)GEc,"GEc");
138:       MatView(GEc,viewer);
139:       PetscObjectSetName((PetscObject)(*GKins),"GK");
140:       MatView(*GKins,viewer);
141:       PetscObjectSetName((PetscObject)GEd,"Gproj");
142:       MatView(GEd,viewer);
143:       PetscViewerDestroy(&viewer);
144:     }
145: #endif
146:     MatDestroy(&GEd);
147:     MatDestroy(&GEc);
148:   }

150:   return(0);
151: }

153: PetscErrorCode PCBDDCNedelecSupport(PC pc)
154: {
155:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
156:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
157:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
158:   Vec                    tvec;
159:   PetscSF                sfv;
160:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
161:   MPI_Comm               comm;
162:   IS                     lned,primals,allprimals,nedfieldlocal;
163:   IS                     *eedges,*extrows,*extcols,*alleedges;
164:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
165:   PetscScalar            *vals,*work;
166:   PetscReal              *rwork;
167:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
168:   PetscInt               ne,nv,Lv,order,n,field;
169:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
170:   PetscInt               i,j,extmem,cum,maxsize,nee;
171:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
172:   PetscInt               *sfvleaves,*sfvroots;
173:   PetscInt               *corners,*cedges;
174:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
175: #if defined(PETSC_USE_DEBUG)
176:   PetscInt               *emarks;
177: #endif
178:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
179:   PetscErrorCode         ierr;

182:   /* If the discrete gradient is defined for a subset of dofs and global is true,
183:      it assumes G is given in global ordering for all the dofs.
184:      Otherwise, the ordering is global for the Nedelec field */
185:   order      = pcbddc->nedorder;
186:   conforming = pcbddc->conforming;
187:   field      = pcbddc->nedfield;
188:   global     = pcbddc->nedglobal;
189:   setprimal  = PETSC_FALSE;
190:   print      = PETSC_FALSE;
191:   singular   = PETSC_FALSE;

193:   /* Command line customization */
194:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
195:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
196:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
197:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
198:   /* print debug info TODO: to be removed */
199:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
200:   PetscOptionsEnd();

202:   /* Return if there are no edges in the decomposition and the problem is not singular */
203:   MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
204:   ISLocalToGlobalMappingGetSize(al2g,&n);
205:   PetscObjectGetComm((PetscObject)pc,&comm);
206:   if (!singular) {
207:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
208:     lrc[0] = PETSC_FALSE;
209:     for (i=0;i<n;i++) {
210:       if (PetscRealPart(vals[i]) > 2.) {
211:         lrc[0] = PETSC_TRUE;
212:         break;
213:       }
214:     }
215:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
216:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
217:     if (!lrc[1]) return(0);
218:   }

220:   /* Get Nedelec field */
221:   MatISSetUpSF(pc->pmat);
222:   if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal);
223:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
224:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
225:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
226:     ISGetLocalSize(nedfieldlocal,&ne);
227:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
228:     ne            = n;
229:     nedfieldlocal = NULL;
230:     global        = PETSC_TRUE;
231:   } else if (field == PETSC_DECIDE) {
232:     PetscInt rst,ren,*idx;

234:     PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
235:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
236:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
237:     for (i=rst;i<ren;i++) {
238:       PetscInt nc;

240:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
241:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
242:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243:     }
244:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
245:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
246:     PetscMalloc1(n,&idx);
247:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
248:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
249:   } else {
250:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
251:   }

253:   /* Sanity checks */
254:   if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
255:   if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
256:   if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order);

258:   /* Just set primal dofs and return */
259:   if (setprimal) {
260:     IS       enedfieldlocal;
261:     PetscInt *eidxs;

263:     PetscMalloc1(ne,&eidxs);
264:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
265:     if (nedfieldlocal) {
266:       ISGetIndices(nedfieldlocal,&idxs);
267:       for (i=0,cum=0;i<ne;i++) {
268:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
269:           eidxs[cum++] = idxs[i];
270:         }
271:       }
272:       ISRestoreIndices(nedfieldlocal,&idxs);
273:     } else {
274:       for (i=0,cum=0;i<ne;i++) {
275:         if (PetscRealPart(vals[i]) > 2.) {
276:           eidxs[cum++] = i;
277:         }
278:       }
279:     }
280:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
281:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
282:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
283:     PetscFree(eidxs);
284:     ISDestroy(&nedfieldlocal);
285:     ISDestroy(&enedfieldlocal);
286:     return(0);
287:   }

289:   /* Compute some l2g maps */
290:   if (nedfieldlocal) {
291:     IS is;

293:     /* need to map from the local Nedelec field to local numbering */
294:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
295:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
296:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
297:     ISLocalToGlobalMappingCreateIS(is,&al2g);
298:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
299:     if (global) {
300:       PetscObjectReference((PetscObject)al2g);
301:       el2g = al2g;
302:     } else {
303:       IS gis;

305:       ISRenumber(is,NULL,NULL,&gis);
306:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
307:       ISDestroy(&gis);
308:     }
309:     ISDestroy(&is);
310:   } else {
311:     /* restore default */
312:     pcbddc->nedfield = -1;
313:     /* one ref for the destruction of al2g, one for el2g */
314:     PetscObjectReference((PetscObject)al2g);
315:     PetscObjectReference((PetscObject)al2g);
316:     el2g = al2g;
317:     fl2g = NULL;
318:   }

320:   /* Start communication to drop connections for interior edges (for cc analysis only) */
321:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
322:   PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
323:   if (nedfieldlocal) {
324:     ISGetIndices(nedfieldlocal,&idxs);
325:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
326:     ISRestoreIndices(nedfieldlocal,&idxs);
327:   } else {
328:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
329:   }
330:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
331:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

333:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
334:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
335:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
336:     if (global) {
337:       PetscInt rst;

339:       MatGetOwnershipRange(G,&rst,NULL);
340:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
341:         if (matis->sf_rootdata[i] < 2) {
342:           matis->sf_rootdata[cum++] = i + rst;
343:         }
344:       }
345:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
346:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
347:     } else {
348:       PetscInt *tbz;

350:       PetscMalloc1(ne,&tbz);
351:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
352:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
353:       ISGetIndices(nedfieldlocal,&idxs);
354:       for (i=0,cum=0;i<ne;i++)
355:         if (matis->sf_leafdata[idxs[i]] == 1)
356:           tbz[cum++] = i;
357:       ISRestoreIndices(nedfieldlocal,&idxs);
358:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
359:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
360:       PetscFree(tbz);
361:     }
362:   } else { /* we need the entire G to infer the nullspace */
363:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
364:     G    = pcbddc->discretegradient;
365:   }

367:   /* Extract subdomain relevant rows of G */
368:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
369:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
370:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
371:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
372:   ISDestroy(&lned);
373:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
374:   MatDestroy(&lGall);
375:   MatISGetLocalMat(lGis,&lG);

377:   /* SF for nodal dofs communications */
378:   MatGetLocalSize(G,NULL,&Lv);
379:   MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
380:   PetscObjectReference((PetscObject)vl2g);
381:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
382:   PetscSFCreate(comm,&sfv);
383:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
384:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
385:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
386:   i    = singular ? 2 : 1;
387:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

389:   /* Destroy temporary G created in MATIS format and modified G */
390:   PetscObjectReference((PetscObject)lG);
391:   MatDestroy(&lGis);
392:   MatDestroy(&G);

394:   if (print) {
395:     PetscObjectSetName((PetscObject)lG,"initial_lG");
396:     MatView(lG,NULL);
397:   }

399:   /* Save lG for values insertion in change of basis */
400:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

402:   /* Analyze the edge-nodes connections (duplicate lG) */
403:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
404:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
405:   PetscBTCreate(nv,&btv);
406:   PetscBTCreate(ne,&bte);
407:   PetscBTCreate(ne,&btb);
408:   PetscBTCreate(ne,&btbd);
409:   PetscBTCreate(nv,&btvcand);
410:   /* need to import the boundary specification to ensure the
411:      proper detection of coarse edges' endpoints */
412:   if (pcbddc->DirichletBoundariesLocal) {
413:     IS is;

415:     if (fl2g) {
416:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
417:     } else {
418:       is = pcbddc->DirichletBoundariesLocal;
419:     }
420:     ISGetLocalSize(is,&cum);
421:     ISGetIndices(is,&idxs);
422:     for (i=0;i<cum;i++) {
423:       if (idxs[i] >= 0) {
424:         PetscBTSet(btb,idxs[i]);
425:         PetscBTSet(btbd,idxs[i]);
426:       }
427:     }
428:     ISRestoreIndices(is,&idxs);
429:     if (fl2g) {
430:       ISDestroy(&is);
431:     }
432:   }
433:   if (pcbddc->NeumannBoundariesLocal) {
434:     IS is;

436:     if (fl2g) {
437:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
438:     } else {
439:       is = pcbddc->NeumannBoundariesLocal;
440:     }
441:     ISGetLocalSize(is,&cum);
442:     ISGetIndices(is,&idxs);
443:     for (i=0;i<cum;i++) {
444:       if (idxs[i] >= 0) {
445:         PetscBTSet(btb,idxs[i]);
446:       }
447:     }
448:     ISRestoreIndices(is,&idxs);
449:     if (fl2g) {
450:       ISDestroy(&is);
451:     }
452:   }

454:   /* Count neighs per dof */
455:   PetscCalloc1(ne,&ecount);
456:   PetscMalloc1(ne,&eneighs);
457:   ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
458:   for (i=1,cum=0;i<n_neigh;i++) {
459:     cum += n_shared[i];
460:     for (j=0;j<n_shared[i];j++) {
461:       ecount[shared[i][j]]++;
462:     }
463:   }
464:   if (ne) {
465:     PetscMalloc1(cum,&eneighs[0]);
466:   }
467:   for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1];
468:   PetscMemzero(ecount,ne*sizeof(PetscInt));
469:   for (i=1;i<n_neigh;i++) {
470:     for (j=0;j<n_shared[i];j++) {
471:       PetscInt k = shared[i][j];
472:       eneighs[k][ecount[k]] = neigh[i];
473:       ecount[k]++;
474:     }
475:   }
476:   for (i=0;i<ne;i++) {
477:     PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);
478:   }
479:   ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
480:   PetscCalloc1(nv,&vcount);
481:   PetscMalloc1(nv,&vneighs);
482:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
483:   for (i=1,cum=0;i<n_neigh;i++) {
484:     cum += n_shared[i];
485:     for (j=0;j<n_shared[i];j++) {
486:       vcount[shared[i][j]]++;
487:     }
488:   }
489:   if (nv) {
490:     PetscMalloc1(cum,&vneighs[0]);
491:   }
492:   for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1];
493:   PetscMemzero(vcount,nv*sizeof(PetscInt));
494:   for (i=1;i<n_neigh;i++) {
495:     for (j=0;j<n_shared[i];j++) {
496:       PetscInt k = shared[i][j];
497:       vneighs[k][vcount[k]] = neigh[i];
498:       vcount[k]++;
499:     }
500:   }
501:   for (i=0;i<nv;i++) {
502:     PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);
503:   }
504:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

506:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
507:      for proper detection of coarse edges' endpoints */
508:   PetscBTCreate(ne,&btee);
509:   for (i=0;i<ne;i++) {
510:     if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) {
511:       PetscBTSet(btee,i);
512:     }
513:   }
514:   PetscMalloc1(ne,&marks);
515:   if (!conforming) {
516:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
517:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
518:   }
519:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
520:   MatSeqAIJGetArray(lGe,&vals);
521:   cum  = 0;
522:   for (i=0;i<ne;i++) {
523:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
524:     if (!PetscBTLookup(btee,i)) {
525:       marks[cum++] = i;
526:       continue;
527:     }
528:     /* set badly connected edge dofs as primal */
529:     if (!conforming) {
530:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
531:         marks[cum++] = i;
532:         PetscBTSet(bte,i);
533:         for (j=ii[i];j<ii[i+1];j++) {
534:           PetscBTSet(btv,jj[j]);
535:         }
536:       } else {
537:         /* every edge dofs should be connected trough a certain number of nodal dofs
538:            to other edge dofs belonging to coarse edges
539:            - at most 2 endpoints
540:            - order-1 interior nodal dofs
541:            - no undefined nodal dofs (nconn < order)
542:         */
543:         PetscInt ends = 0,ints = 0, undef = 0;
544:         for (j=ii[i];j<ii[i+1];j++) {
545:           PetscInt v = jj[j],k;
546:           PetscInt nconn = iit[v+1]-iit[v];
547:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
548:           if (nconn > order) ends++;
549:           else if (nconn == order) ints++;
550:           else undef++;
551:         }
552:         if (undef || ends > 2 || ints != order -1) {
553:           marks[cum++] = i;
554:           PetscBTSet(bte,i);
555:           for (j=ii[i];j<ii[i+1];j++) {
556:             PetscBTSet(btv,jj[j]);
557:           }
558:         }
559:       }
560:     }
561:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
562:     if (!order && ii[i+1] != ii[i]) {
563:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
564:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
565:     }
566:   }
567:   PetscBTDestroy(&btee);
568:   MatSeqAIJRestoreArray(lGe,&vals);
569:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
570:   if (!conforming) {
571:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
572:     MatDestroy(&lGt);
573:   }
574:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

576:   /* identify splitpoints and corner candidates */
577:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
578:   if (print) {
579:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
580:     MatView(lGe,NULL);
581:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
582:     MatView(lGt,NULL);
583:   }
584:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
585:   MatSeqAIJGetArray(lGt,&vals);
586:   for (i=0;i<nv;i++) {
587:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
588:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
589:     if (!order) { /* variable order */
590:       PetscReal vorder = 0.;

592:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
593:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
594:       if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test);
595:       ord  = 1;
596:     }
597: #if defined(PETSC_USE_DEBUG)
598:     if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord);
599: #endif
600:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
601:       if (PetscBTLookup(btbd,jj[j])) {
602:         bdir = PETSC_TRUE;
603:         break;
604:       }
605:       if (vc != ecount[jj[j]]) {
606:         sneighs = PETSC_FALSE;
607:       } else {
608:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
609:         for (k=0;k<vc;k++) {
610:           if (vn[k] != en[k]) {
611:             sneighs = PETSC_FALSE;
612:             break;
613:           }
614:         }
615:       }
616:     }
617:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
618:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir);
619:       PetscBTSet(btv,i);
620:     } else if (test == ord) {
621:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
622:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i);
623:         PetscBTSet(btv,i);
624:       } else {
625:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i);
626:         PetscBTSet(btvcand,i);
627:       }
628:     }
629:   }
630:   PetscFree(ecount);
631:   PetscFree(vcount);
632:   if (ne) {
633:     PetscFree(eneighs[0]);
634:   }
635:   if (nv) {
636:     PetscFree(vneighs[0]);
637:   }
638:   PetscFree(eneighs);
639:   PetscFree(vneighs);
640:   PetscBTDestroy(&btbd);

642:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
643:   if (order != 1) {
644:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
645:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
646:     for (i=0;i<nv;i++) {
647:       if (PetscBTLookup(btvcand,i)) {
648:         PetscBool found = PETSC_FALSE;
649:         for (j=ii[i];j<ii[i+1] && !found;j++) {
650:           PetscInt k,e = jj[j];
651:           if (PetscBTLookup(bte,e)) continue;
652:           for (k=iit[e];k<iit[e+1];k++) {
653:             PetscInt v = jjt[k];
654:             if (v != i && PetscBTLookup(btvcand,v)) {
655:               found = PETSC_TRUE;
656:               break;
657:             }
658:           }
659:         }
660:         if (!found) {
661:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %d CLEARED\n",i);
662:           PetscBTClear(btvcand,i);
663:         } else {
664:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %d ACCEPTED\n",i);
665:         }
666:       }
667:     }
668:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
669:   }
670:   MatSeqAIJRestoreArray(lGt,&vals);
671:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
672:   MatDestroy(&lGe);

674:   /* Get the local G^T explicitly */
675:   MatDestroy(&lGt);
676:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
677:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

679:   /* Mark interior nodal dofs */
680:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
681:   PetscBTCreate(nv,&btvi);
682:   for (i=1;i<n_neigh;i++) {
683:     for (j=0;j<n_shared[i];j++) {
684:       PetscBTSet(btvi,shared[i][j]);
685:     }
686:   }
687:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

689:   /* communicate corners and splitpoints */
690:   PetscMalloc1(nv,&vmarks);
691:   PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
692:   PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
693:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

695:   if (print) {
696:     IS tbz;

698:     cum = 0;
699:     for (i=0;i<nv;i++)
700:       if (sfvleaves[i])
701:         vmarks[cum++] = i;

703:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
704:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
705:     ISView(tbz,NULL);
706:     ISDestroy(&tbz);
707:   }

709:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
710:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
711:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
712:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);

714:   /* Zero rows of lGt corresponding to identified corners
715:      and interior nodal dofs */
716:   cum = 0;
717:   for (i=0;i<nv;i++) {
718:     if (sfvleaves[i]) {
719:       vmarks[cum++] = i;
720:       PetscBTSet(btv,i);
721:     }
722:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
723:   }
724:   PetscBTDestroy(&btvi);
725:   if (print) {
726:     IS tbz;

728:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
729:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
730:     ISView(tbz,NULL);
731:     ISDestroy(&tbz);
732:   }
733:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
734:   PetscFree(vmarks);
735:   PetscSFDestroy(&sfv);
736:   PetscFree2(sfvleaves,sfvroots);

738:   /* Recompute G */
739:   MatDestroy(&lG);
740:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
741:   if (print) {
742:     PetscObjectSetName((PetscObject)lG,"used_lG");
743:     MatView(lG,NULL);
744:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
745:     MatView(lGt,NULL);
746:   }

748:   /* Get primal dofs (if any) */
749:   cum = 0;
750:   for (i=0;i<ne;i++) {
751:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
752:   }
753:   if (fl2g) {
754:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
755:   }
756:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
757:   if (print) {
758:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
759:     ISView(primals,NULL);
760:   }
761:   PetscBTDestroy(&bte);
762:   /* TODO: what if the user passed in some of them ?  */
763:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
764:   ISDestroy(&primals);

766:   /* Compute edge connectivity */
767:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
768:   MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
769:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
770:   if (fl2g) {
771:     PetscBT   btf;
772:     PetscInt  *iia,*jja,*iiu,*jju;
773:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

775:     /* create CSR for all local dofs */
776:     PetscMalloc1(n+1,&iia);
777:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
778:       if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n);
779:       iiu = pcbddc->mat_graph->xadj;
780:       jju = pcbddc->mat_graph->adjncy;
781:     } else if (pcbddc->use_local_adj) {
782:       rest = PETSC_TRUE;
783:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
784:     } else {
785:       free   = PETSC_TRUE;
786:       PetscMalloc2(n+1,&iiu,n,&jju);
787:       iiu[0] = 0;
788:       for (i=0;i<n;i++) {
789:         iiu[i+1] = i+1;
790:         jju[i]   = -1;
791:       }
792:     }

794:     /* import sizes of CSR */
795:     iia[0] = 0;
796:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

798:     /* overwrite entries corresponding to the Nedelec field */
799:     PetscBTCreate(n,&btf);
800:     ISGetIndices(nedfieldlocal,&idxs);
801:     for (i=0;i<ne;i++) {
802:       PetscBTSet(btf,idxs[i]);
803:       iia[idxs[i]+1] = ii[i+1]-ii[i];
804:     }

806:     /* iia in CSR */
807:     for (i=0;i<n;i++) iia[i+1] += iia[i];

809:     /* jja in CSR */
810:     PetscMalloc1(iia[n],&jja);
811:     for (i=0;i<n;i++)
812:       if (!PetscBTLookup(btf,i))
813:         for (j=0;j<iiu[i+1]-iiu[i];j++)
814:           jja[iia[i]+j] = jju[iiu[i]+j];

816:     /* map edge dofs connectivity */
817:     if (jj) {
818:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
819:       for (i=0;i<ne;i++) {
820:         PetscInt e = idxs[i];
821:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
822:       }
823:     }
824:     ISRestoreIndices(nedfieldlocal,&idxs);
825:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
826:     if (rest) {
827:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
828:     }
829:     if (free) {
830:       PetscFree2(iiu,jju);
831:     }
832:     PetscBTDestroy(&btf);
833:   } else {
834:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
835:   }

837:   /* Analyze interface for edge dofs */
838:   PCBDDCAnalyzeInterface(pc);
839:   pcbddc->mat_graph->twodim = PETSC_FALSE;

841:   /* Get coarse edges in the edge space */
842:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
843:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

845:   if (fl2g) {
846:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
847:     PetscMalloc1(nee,&eedges);
848:     for (i=0;i<nee;i++) {
849:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
850:     }
851:   } else {
852:     eedges  = alleedges;
853:     primals = allprimals;
854:   }

856:   /* Mark fine edge dofs with their coarse edge id */
857:   PetscMemzero(marks,ne*sizeof(PetscInt));
858:   ISGetLocalSize(primals,&cum);
859:   ISGetIndices(primals,&idxs);
860:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
861:   ISRestoreIndices(primals,&idxs);
862:   if (print) {
863:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
864:     ISView(primals,NULL);
865:   }

867:   maxsize = 0;
868:   for (i=0;i<nee;i++) {
869:     PetscInt size,mark = i+1;

871:     ISGetLocalSize(eedges[i],&size);
872:     ISGetIndices(eedges[i],&idxs);
873:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
874:     ISRestoreIndices(eedges[i],&idxs);
875:     maxsize = PetscMax(maxsize,size);
876:   }

878:   /* Find coarse edge endpoints */
879:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
880:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
881:   for (i=0;i<nee;i++) {
882:     PetscInt mark = i+1,size;

884:     ISGetLocalSize(eedges[i],&size);
885:     if (!size && nedfieldlocal) continue;
886:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
887:     ISGetIndices(eedges[i],&idxs);
888:     if (print) {
889:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i);
890:       ISView(eedges[i],NULL);
891:     }
892:     for (j=0;j<size;j++) {
893:       PetscInt k, ee = idxs[j];
894:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %d\n",ee);
895:       for (k=ii[ee];k<ii[ee+1];k++) {
896:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %d\n",jj[k]);
897:         if (PetscBTLookup(btv,jj[k])) {
898:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %d\n",jj[k]);
899:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
900:           PetscInt  k2;
901:           PetscBool corner = PETSC_FALSE;
902:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
903:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
904:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
905:                if the edge dof lie on the natural part of the boundary */
906:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
907:               corner = PETSC_TRUE;
908:               break;
909:             }
910:           }
911:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
912:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %d\n",jj[k]);
913:             PetscBTSet(btv,jj[k]);
914:           } else {
915:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
916:           }
917:         }
918:       }
919:     }
920:     ISRestoreIndices(eedges[i],&idxs);
921:   }
922:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
923:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
924:   PetscBTDestroy(&btb);

926:   /* Reset marked primal dofs */
927:   ISGetLocalSize(primals,&cum);
928:   ISGetIndices(primals,&idxs);
929:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
930:   ISRestoreIndices(primals,&idxs);

932:   /* Now use the initial lG */
933:   MatDestroy(&lG);
934:   MatDestroy(&lGt);
935:   lG   = lGinit;
936:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

938:   /* Compute extended cols indices */
939:   PetscBTCreate(nv,&btvc);
940:   PetscBTCreate(nee,&bter);
941:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
942:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
943:   i   *= maxsize;
944:   PetscCalloc1(nee,&extcols);
945:   PetscMalloc2(i,&extrow,i,&gidxs);
946:   eerr = PETSC_FALSE;
947:   for (i=0;i<nee;i++) {
948:     PetscInt size,found = 0;

950:     cum  = 0;
951:     ISGetLocalSize(eedges[i],&size);
952:     if (!size && nedfieldlocal) continue;
953:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
954:     ISGetIndices(eedges[i],&idxs);
955:     PetscBTMemzero(nv,btvc);
956:     for (j=0;j<size;j++) {
957:       PetscInt k,ee = idxs[j];
958:       for (k=ii[ee];k<ii[ee+1];k++) {
959:         PetscInt vv = jj[k];
960:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
961:         else if (!PetscBTLookupSet(btvc,vv)) found++;
962:       }
963:     }
964:     ISRestoreIndices(eedges[i],&idxs);
965:     PetscSortRemoveDupsInt(&cum,extrow);
966:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
967:     PetscSortIntWithArray(cum,gidxs,extrow);
968:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
969:     /* it may happen that endpoints are not defined at this point
970:        if it is the case, mark this edge for a second pass */
971:     if (cum != size -1 || found != 2) {
972:       PetscBTSet(bter,i);
973:       if (print) {
974:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
975:         ISView(eedges[i],NULL);
976:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
977:         ISView(extcols[i],NULL);
978:       }
979:       eerr = PETSC_TRUE;
980:     }
981:   }
982:   /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
983:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
984:   if (done) {
985:     PetscInt *newprimals;

987:     PetscMalloc1(ne,&newprimals);
988:     ISGetLocalSize(primals,&cum);
989:     ISGetIndices(primals,&idxs);
990:     PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
991:     ISRestoreIndices(primals,&idxs);
992:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
993:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr);
994:     for (i=0;i<nee;i++) {
995:       PetscBool has_candidates = PETSC_FALSE;
996:       if (PetscBTLookup(bter,i)) {
997:         PetscInt size,mark = i+1;

999:         ISGetLocalSize(eedges[i],&size);
1000:         ISGetIndices(eedges[i],&idxs);
1001:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1002:         for (j=0;j<size;j++) {
1003:           PetscInt k,ee = idxs[j];
1004:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]);
1005:           for (k=ii[ee];k<ii[ee+1];k++) {
1006:             /* set all candidates located on the edge as corners */
1007:             if (PetscBTLookup(btvcand,jj[k])) {
1008:               PetscInt k2,vv = jj[k];
1009:               has_candidates = PETSC_TRUE;
1010:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %d\n",vv);
1011:               PetscBTSet(btv,vv);
1012:               /* set all edge dofs connected to candidate as primals */
1013:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
1014:                 if (marks[jjt[k2]] == mark) {
1015:                   PetscInt k3,ee2 = jjt[k2];
1016:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %d\n",ee2);
1017:                   newprimals[cum++] = ee2;
1018:                   /* finally set the new corners */
1019:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
1020:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %d\n",jj[k3]);
1021:                     PetscBTSet(btv,jj[k3]);
1022:                   }
1023:                 }
1024:               }
1025:             } else {
1026:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %d\n",jj[k]);
1027:             }
1028:           }
1029:         }
1030:         if (!has_candidates) { /* circular edge */
1031:           PetscInt k, ee = idxs[0],*tmarks;

1033:           PetscCalloc1(ne,&tmarks);
1034:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %d\n",i);
1035:           for (k=ii[ee];k<ii[ee+1];k++) {
1036:             PetscInt k2;
1037:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %d\n",jj[k]);
1038:             PetscBTSet(btv,jj[k]);
1039:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
1040:           }
1041:           for (j=0;j<size;j++) {
1042:             if (tmarks[idxs[j]] > 1) {
1043:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %d\n",idxs[j]);
1044:               newprimals[cum++] = idxs[j];
1045:             }
1046:           }
1047:           PetscFree(tmarks);
1048:         }
1049:         ISRestoreIndices(eedges[i],&idxs);
1050:       }
1051:       ISDestroy(&extcols[i]);
1052:     }
1053:     PetscFree(extcols);
1054:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1055:     PetscSortRemoveDupsInt(&cum,newprimals);
1056:     if (fl2g) {
1057:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1058:       ISDestroy(&primals);
1059:       for (i=0;i<nee;i++) {
1060:         ISDestroy(&eedges[i]);
1061:       }
1062:       PetscFree(eedges);
1063:     }
1064:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1065:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1066:     PetscFree(newprimals);
1067:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1068:     ISDestroy(&primals);
1069:     PCBDDCAnalyzeInterface(pc);
1070:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1071:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1072:     if (fl2g) {
1073:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1074:       PetscMalloc1(nee,&eedges);
1075:       for (i=0;i<nee;i++) {
1076:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1077:       }
1078:     } else {
1079:       eedges  = alleedges;
1080:       primals = allprimals;
1081:     }
1082:     PetscCalloc1(nee,&extcols);

1084:     /* Mark again */
1085:     PetscMemzero(marks,ne*sizeof(PetscInt));
1086:     for (i=0;i<nee;i++) {
1087:       PetscInt size,mark = i+1;

1089:       ISGetLocalSize(eedges[i],&size);
1090:       ISGetIndices(eedges[i],&idxs);
1091:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1092:       ISRestoreIndices(eedges[i],&idxs);
1093:     }
1094:     if (print) {
1095:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1096:       ISView(primals,NULL);
1097:     }

1099:     /* Recompute extended cols */
1100:     eerr = PETSC_FALSE;
1101:     for (i=0;i<nee;i++) {
1102:       PetscInt size;

1104:       cum  = 0;
1105:       ISGetLocalSize(eedges[i],&size);
1106:       if (!size && nedfieldlocal) continue;
1107:       if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1108:       ISGetIndices(eedges[i],&idxs);
1109:       for (j=0;j<size;j++) {
1110:         PetscInt k,ee = idxs[j];
1111:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1112:       }
1113:       ISRestoreIndices(eedges[i],&idxs);
1114:       PetscSortRemoveDupsInt(&cum,extrow);
1115:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1116:       PetscSortIntWithArray(cum,gidxs,extrow);
1117:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1118:       if (cum != size -1) {
1119:         if (print) {
1120:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1121:           ISView(eedges[i],NULL);
1122:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1123:           ISView(extcols[i],NULL);
1124:         }
1125:         eerr = PETSC_TRUE;
1126:       }
1127:     }
1128:   }
1129:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1130:   PetscFree2(extrow,gidxs);
1131:   PetscBTDestroy(&bter);
1132:   if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1133:   /* an error should not occur at this point */
1134:   if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1136:   /* Check the number of endpoints */
1137:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1138:   PetscMalloc1(2*nee,&corners);
1139:   PetscMalloc1(nee,&cedges);
1140:   for (i=0;i<nee;i++) {
1141:     PetscInt size, found = 0, gc[2];

1143:     /* init with defaults */
1144:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1145:     ISGetLocalSize(eedges[i],&size);
1146:     if (!size && nedfieldlocal) continue;
1147:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1148:     ISGetIndices(eedges[i],&idxs);
1149:     PetscBTMemzero(nv,btvc);
1150:     for (j=0;j<size;j++) {
1151:       PetscInt k,ee = idxs[j];
1152:       for (k=ii[ee];k<ii[ee+1];k++) {
1153:         PetscInt vv = jj[k];
1154:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1155:           if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i);
1156:           corners[i*2+found++] = vv;
1157:         }
1158:       }
1159:     }
1160:     if (found != 2) {
1161:       PetscInt e;
1162:       if (fl2g) {
1163:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1164:       } else {
1165:         e = idxs[0];
1166:       }
1167:       SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]);
1168:     }

1170:     /* get primal dof index on this coarse edge */
1171:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1172:     if (gc[0] > gc[1]) {
1173:       PetscInt swap  = corners[2*i];
1174:       corners[2*i]   = corners[2*i+1];
1175:       corners[2*i+1] = swap;
1176:     }
1177:     cedges[i] = idxs[size-1];
1178:     ISRestoreIndices(eedges[i],&idxs);
1179:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1180:   }
1181:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1182:   PetscBTDestroy(&btvc);

1184: #if defined(PETSC_USE_DEBUG)
1185:   /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1186:      not interfere with neighbouring coarse edges */
1187:   PetscMalloc1(nee+1,&emarks);
1188:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1189:   for (i=0;i<nv;i++) {
1190:     PetscInt emax = 0,eemax = 0;

1192:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1193:     PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1194:     for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1195:     for (j=1;j<nee+1;j++) {
1196:       if (emax < emarks[j]) {
1197:         emax = emarks[j];
1198:         eemax = j;
1199:       }
1200:     }
1201:     /* not relevant for edges */
1202:     if (!eemax) continue;

1204:     for (j=ii[i];j<ii[i+1];j++) {
1205:       if (marks[jj[j]] && marks[jj[j]] != eemax) {
1206:         SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]);
1207:       }
1208:     }
1209:   }
1210:   PetscFree(emarks);
1211:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1212: #endif

1214:   /* Compute extended rows indices for edge blocks of the change of basis */
1215:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1216:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1217:   extmem *= maxsize;
1218:   PetscMalloc1(extmem*nee,&extrow);
1219:   PetscMalloc1(nee,&extrows);
1220:   PetscCalloc1(nee,&extrowcum);
1221:   for (i=0;i<nv;i++) {
1222:     PetscInt mark = 0,size,start;

1224:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1225:     for (j=ii[i];j<ii[i+1];j++)
1226:       if (marks[jj[j]] && !mark)
1227:         mark = marks[jj[j]];

1229:     /* not relevant */
1230:     if (!mark) continue;

1232:     /* import extended row */
1233:     mark--;
1234:     start = mark*extmem+extrowcum[mark];
1235:     size = ii[i+1]-ii[i];
1236:     if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem);
1237:     PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1238:     extrowcum[mark] += size;
1239:   }
1240:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1241:   MatDestroy(&lGt);
1242:   PetscFree(marks);

1244:   /* Compress extrows */
1245:   cum  = 0;
1246:   for (i=0;i<nee;i++) {
1247:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1248:     PetscSortRemoveDupsInt(&size,start);
1249:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1250:     cum  = PetscMax(cum,size);
1251:   }
1252:   PetscFree(extrowcum);
1253:   PetscBTDestroy(&btv);
1254:   PetscBTDestroy(&btvcand);

1256:   /* Workspace for lapack inner calls and VecSetValues */
1257:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1259:   /* Create change of basis matrix (preallocation can be improved) */
1260:   MatCreate(comm,&T);
1261:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1262:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1263:   MatSetType(T,MATAIJ);
1264:   MatSeqAIJSetPreallocation(T,10,NULL);
1265:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1266:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1267:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1268:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1269:   ISLocalToGlobalMappingDestroy(&al2g);

1271:   /* Defaults to identity */
1272:   MatCreateVecs(pc->pmat,&tvec,NULL);
1273:   VecSet(tvec,1.0);
1274:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1275:   VecDestroy(&tvec);

1277:   /* Create discrete gradient for the coarser level if needed */
1278:   MatDestroy(&pcbddc->nedcG);
1279:   ISDestroy(&pcbddc->nedclocal);
1280:   if (pcbddc->current_level < pcbddc->max_levels) {
1281:     ISLocalToGlobalMapping cel2g,cvl2g;
1282:     IS                     wis,gwis;
1283:     PetscInt               cnv,cne;

1285:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1286:     if (fl2g) {
1287:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1288:     } else {
1289:       PetscObjectReference((PetscObject)wis);
1290:       pcbddc->nedclocal = wis;
1291:     }
1292:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1293:     ISDestroy(&wis);
1294:     ISRenumber(gwis,NULL,&cne,&wis);
1295:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1296:     ISDestroy(&wis);
1297:     ISDestroy(&gwis);

1299:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1300:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1301:     ISDestroy(&wis);
1302:     ISRenumber(gwis,NULL,&cnv,&wis);
1303:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1304:     ISDestroy(&wis);
1305:     ISDestroy(&gwis);

1307:     MatCreate(comm,&pcbddc->nedcG);
1308:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1309:     MatSetType(pcbddc->nedcG,MATAIJ);
1310:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1311:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1312:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1313:     ISLocalToGlobalMappingDestroy(&cel2g);
1314:     ISLocalToGlobalMappingDestroy(&cvl2g);
1315:   }
1316:   ISLocalToGlobalMappingDestroy(&vl2g);

1318: #if defined(PRINT_GDET)
1319:   inc = 0;
1320:   lev = pcbddc->current_level;
1321: #endif

1323:   /* Insert values in the change of basis matrix */
1324:   for (i=0;i<nee;i++) {
1325:     Mat         Gins = NULL, GKins = NULL;
1326:     IS          cornersis = NULL;
1327:     PetscScalar cvals[2];

1329:     if (pcbddc->nedcG) {
1330:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1331:     }
1332:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1333:     if (Gins && GKins) {
1334:       PetscScalar    *data;
1335:       const PetscInt *rows,*cols;
1336:       PetscInt       nrh,nch,nrc,ncc;

1338:       ISGetIndices(eedges[i],&cols);
1339:       /* H1 */
1340:       ISGetIndices(extrows[i],&rows);
1341:       MatGetSize(Gins,&nrh,&nch);
1342:       MatDenseGetArray(Gins,&data);
1343:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1344:       MatDenseRestoreArray(Gins,&data);
1345:       ISRestoreIndices(extrows[i],&rows);
1346:       /* complement */
1347:       MatGetSize(GKins,&nrc,&ncc);
1348:       if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i);
1349:       if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i);
1350:       if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc);
1351:       MatDenseGetArray(GKins,&data);
1352:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1353:       MatDenseRestoreArray(GKins,&data);

1355:       /* coarse discrete gradient */
1356:       if (pcbddc->nedcG) {
1357:         PetscInt cols[2];

1359:         cols[0] = 2*i;
1360:         cols[1] = 2*i+1;
1361:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1362:       }
1363:       ISRestoreIndices(eedges[i],&cols);
1364:     }
1365:     ISDestroy(&extrows[i]);
1366:     ISDestroy(&extcols[i]);
1367:     ISDestroy(&cornersis);
1368:     MatDestroy(&Gins);
1369:     MatDestroy(&GKins);
1370:   }
1371:   ISLocalToGlobalMappingDestroy(&el2g);

1373:   /* Start assembling */
1374:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1375:   if (pcbddc->nedcG) {
1376:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1377:   }

1379:   /* Free */
1380:   if (fl2g) {
1381:     ISDestroy(&primals);
1382:     for (i=0;i<nee;i++) {
1383:       ISDestroy(&eedges[i]);
1384:     }
1385:     PetscFree(eedges);
1386:   }

1388:   /* hack mat_graph with primal dofs on the coarse edges */
1389:   {
1390:     PCBDDCGraph graph   = pcbddc->mat_graph;
1391:     PetscInt    *oqueue = graph->queue;
1392:     PetscInt    *ocptr  = graph->cptr;
1393:     PetscInt    ncc,*idxs;

1395:     /* find first primal edge */
1396:     if (pcbddc->nedclocal) {
1397:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1398:     } else {
1399:       if (fl2g) {
1400:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1401:       }
1402:       idxs = cedges;
1403:     }
1404:     cum = 0;
1405:     while (cum < nee && cedges[cum] < 0) cum++;

1407:     /* adapt connected components */
1408:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1409:     graph->cptr[0] = 0;
1410:     for (i=0,ncc=0;i<graph->ncc;i++) {
1411:       PetscInt lc = ocptr[i+1]-ocptr[i];
1412:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1413:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1414:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1415:         ncc++;
1416:         lc--;
1417:         cum++;
1418:         while (cum < nee && cedges[cum] < 0) cum++;
1419:       }
1420:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1421:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1422:       ncc++;
1423:     }
1424:     graph->ncc = ncc;
1425:     if (pcbddc->nedclocal) {
1426:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1427:     }
1428:     PetscFree2(ocptr,oqueue);
1429:   }
1430:   ISLocalToGlobalMappingDestroy(&fl2g);
1431:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1432:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1433:   MatDestroy(&conn);

1435:   ISDestroy(&nedfieldlocal);
1436:   PetscFree(extrow);
1437:   PetscFree2(work,rwork);
1438:   PetscFree(corners);
1439:   PetscFree(cedges);
1440:   PetscFree(extrows);
1441:   PetscFree(extcols);
1442:   MatDestroy(&lG);

1444:   /* Complete assembling */
1445:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1446:   if (pcbddc->nedcG) {
1447:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1448: #if 0
1449:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1450:     MatView(pcbddc->nedcG,NULL);
1451: #endif
1452:   }

1454:   /* set change of basis */
1455:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1456:   MatDestroy(&T);

1458:   return(0);
1459: }

1461: /* the near-null space of BDDC carries information on quadrature weights,
1462:    and these can be collinear -> so cheat with MatNullSpaceCreate
1463:    and create a suitable set of basis vectors first */
1464: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1465: {
1467:   PetscInt       i;

1470:   for (i=0;i<nvecs;i++) {
1471:     PetscInt first,last;

1473:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1474:     if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1475:     if (i>=first && i < last) {
1476:       PetscScalar *data;
1477:       VecGetArray(quad_vecs[i],&data);
1478:       if (!has_const) {
1479:         data[i-first] = 1.;
1480:       } else {
1481:         data[2*i-first] = 1./PetscSqrtReal(2.);
1482:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1483:       }
1484:       VecRestoreArray(quad_vecs[i],&data);
1485:     }
1486:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1487:   }
1488:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1489:   for (i=0;i<nvecs;i++) { /* reset vectors */
1490:     PetscInt first,last;
1491:     VecLockPop(quad_vecs[i]);
1492:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1493:     if (i>=first && i < last) {
1494:       PetscScalar *data;
1495:       VecGetArray(quad_vecs[i],&data);
1496:       if (!has_const) {
1497:         data[i-first] = 0.;
1498:       } else {
1499:         data[2*i-first] = 0.;
1500:         data[2*i-first+1] = 0.;
1501:       }
1502:       VecRestoreArray(quad_vecs[i],&data);
1503:     }
1504:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1505:     VecLockPush(quad_vecs[i]);
1506:   }
1507:   return(0);
1508: }

1510: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1511: {
1512:   Mat                    loc_divudotp;
1513:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1514:   ISLocalToGlobalMapping map;
1515:   IS                     *faces,*edges;
1516:   PetscScalar            *vals;
1517:   const PetscScalar      *array;
1518:   PetscInt               i,maxneighs,lmaxneighs,maxsize,nf,ne;
1519:   PetscMPIInt            rank;
1520:   PetscErrorCode         ierr;

1523:   PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);
1524:   if (graph->twodim) {
1525:     lmaxneighs = 2;
1526:   } else {
1527:     lmaxneighs = 1;
1528:     for (i=0;i<ne;i++) {
1529:       const PetscInt *idxs;
1530:       ISGetIndices(edges[i],&idxs);
1531:       lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]);
1532:       ISRestoreIndices(edges[i],&idxs);
1533:     }
1534:     lmaxneighs++; /* graph count does not include self */
1535:   }
1536:   MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1537:   maxsize = 0;
1538:   for (i=0;i<ne;i++) {
1539:     PetscInt nn;
1540:     ISGetLocalSize(edges[i],&nn);
1541:     maxsize = PetscMax(maxsize,nn);
1542:   }
1543:   for (i=0;i<nf;i++) {
1544:     PetscInt nn;
1545:     ISGetLocalSize(faces[i],&nn);
1546:     maxsize = PetscMax(maxsize,nn);
1547:   }
1548:   PetscMalloc1(maxsize,&vals);
1549:   /* create vectors to hold quadrature weights */
1550:   MatCreateVecs(A,&quad_vec,NULL);
1551:   if (!transpose) {
1552:     MatGetLocalToGlobalMapping(A,&map,NULL);
1553:   } else {
1554:     MatGetLocalToGlobalMapping(A,NULL,&map);
1555:   }
1556:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1557:   VecDestroy(&quad_vec);
1558:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1559:   for (i=0;i<maxneighs;i++) {
1560:     VecLockPop(quad_vecs[i]);
1561:     VecSetLocalToGlobalMapping(quad_vecs[i],map);
1562:   }

1564:   /* compute local quad vec */
1565:   MatISGetLocalMat(divudotp,&loc_divudotp);
1566:   if (!transpose) {
1567:     MatCreateVecs(loc_divudotp,&v,&p);
1568:   } else {
1569:     MatCreateVecs(loc_divudotp,&p,&v);
1570:   }
1571:   VecSet(p,1.);
1572:   if (!transpose) {
1573:     MatMultTranspose(loc_divudotp,p,v);
1574:   } else {
1575:     MatMult(loc_divudotp,p,v);
1576:   }
1577:   if (vl2l) {
1578:     Mat        lA;
1579:     VecScatter sc;

1581:     MatISGetLocalMat(A,&lA);
1582:     MatCreateVecs(lA,&vins,NULL);
1583:     VecScatterCreate(v,vl2l,vins,NULL,&sc);
1584:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1585:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1586:     VecScatterDestroy(&sc);
1587:   } else {
1588:     vins = v;
1589:   }
1590:   VecGetArrayRead(vins,&array);
1591:   VecDestroy(&p);

1593:   /* insert in global quadrature vecs */
1594:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1595:   for (i=0;i<nf;i++) {
1596:     const PetscInt    *idxs;
1597:     PetscInt          idx,nn,j;

1599:     ISGetIndices(faces[i],&idxs);
1600:     ISGetLocalSize(faces[i],&nn);
1601:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1602:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1603:     idx  = -(idx+1);
1604:     VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1605:     ISRestoreIndices(faces[i],&idxs);
1606:   }
1607:   for (i=0;i<ne;i++) {
1608:     const PetscInt    *idxs;
1609:     PetscInt          idx,nn,j;

1611:     ISGetIndices(edges[i],&idxs);
1612:     ISGetLocalSize(edges[i],&nn);
1613:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1614:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1615:     idx  = -(idx+1);
1616:     VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1617:     ISRestoreIndices(edges[i],&idxs);
1618:   }
1619:   PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);
1620:   VecRestoreArrayRead(vins,&array);
1621:   if (vl2l) {
1622:     VecDestroy(&vins);
1623:   }
1624:   VecDestroy(&v);
1625:   PetscFree(vals);

1627:   /* assemble near null space */
1628:   for (i=0;i<maxneighs;i++) {
1629:     VecAssemblyBegin(quad_vecs[i]);
1630:   }
1631:   for (i=0;i<maxneighs;i++) {
1632:     VecAssemblyEnd(quad_vecs[i]);
1633:     VecLockPush(quad_vecs[i]);
1634:   }
1635:   VecDestroyVecs(maxneighs,&quad_vecs);
1636:   return(0);
1637: }


1640: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1641: {
1643:   Vec            local,global;
1644:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1645:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1646:   PetscBool      monolithic = PETSC_FALSE;

1649:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1650:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1651:   PetscOptionsEnd();
1652:   /* need to convert from global to local topology information and remove references to information in global ordering */
1653:   MatCreateVecs(pc->pmat,&global,NULL);
1654:   MatCreateVecs(matis->A,&local,NULL);
1655:   if (monolithic) goto boundary;

1657:   if (pcbddc->user_provided_isfordofs) {
1658:     if (pcbddc->n_ISForDofs) {
1659:       PetscInt i;
1660:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1661:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1662:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1663:         ISDestroy(&pcbddc->ISForDofs[i]);
1664:       }
1665:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1666:       pcbddc->n_ISForDofs = 0;
1667:       PetscFree(pcbddc->ISForDofs);
1668:     }
1669:   } else {
1670:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1671:       DM dm;

1673:       PCGetDM(pc, &dm);
1674:       if (!dm) {
1675:         MatGetDM(pc->pmat, &dm);
1676:       }
1677:       if (dm) {
1678:         IS      *fields;
1679:         PetscInt nf,i;
1680:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1681:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1682:         for (i=0;i<nf;i++) {
1683:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1684:           ISDestroy(&fields[i]);
1685:         }
1686:         PetscFree(fields);
1687:         pcbddc->n_ISForDofsLocal = nf;
1688:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1689:         PetscContainer   c;

1691:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1692:         if (c) {
1693:           MatISLocalFields lf;
1694:           PetscContainerGetPointer(c,(void**)&lf);
1695:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1696:         } else { /* fallback, create the default fields if bs > 1 */
1697:           PetscInt i, n = matis->A->rmap->n;
1698:           MatGetBlockSize(pc->pmat,&i);
1699:           if (i > 1) {
1700:             pcbddc->n_ISForDofsLocal = i;
1701:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1702:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1703:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1704:             }
1705:           }
1706:         }
1707:       }
1708:     } else {
1709:       PetscInt i;
1710:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1711:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1712:       }
1713:     }
1714:   }

1716: boundary:
1717:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1718:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1719:   } else if (pcbddc->DirichletBoundariesLocal) {
1720:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1721:   }
1722:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1723:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1724:   } else if (pcbddc->NeumannBoundariesLocal) {
1725:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1726:   }
1727:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1728:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1729:   }
1730:   VecDestroy(&global);
1731:   VecDestroy(&local);

1733:   return(0);
1734: }

1736: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1737: {
1738:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1739:   PetscErrorCode  ierr;
1740:   IS              nis;
1741:   const PetscInt  *idxs;
1742:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1743:   PetscBool       *ld;

1746:   if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1747:   MatISSetUpSF(pc->pmat);
1748:   if (mop == MPI_LAND) {
1749:     /* init rootdata with true */
1750:     ld   = (PetscBool*) matis->sf_rootdata;
1751:     for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1752:   } else {
1753:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1754:   }
1755:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1756:   ISGetLocalSize(*is,&nd);
1757:   ISGetIndices(*is,&idxs);
1758:   ld   = (PetscBool*) matis->sf_leafdata;
1759:   for (i=0;i<nd;i++)
1760:     if (-1 < idxs[i] && idxs[i] < n)
1761:       ld[idxs[i]] = PETSC_TRUE;
1762:   ISRestoreIndices(*is,&idxs);
1763:   PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1764:   PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1765:   PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1766:   PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1767:   if (mop == MPI_LAND) {
1768:     PetscMalloc1(nd,&nidxs);
1769:   } else {
1770:     PetscMalloc1(n,&nidxs);
1771:   }
1772:   for (i=0,nnd=0;i<n;i++)
1773:     if (ld[i])
1774:       nidxs[nnd++] = i;
1775:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1776:   ISDestroy(is);
1777:   *is  = nis;
1778:   return(0);
1779: }

1781: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1782: {
1783:   PC_IS             *pcis = (PC_IS*)(pc->data);
1784:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1785:   PetscErrorCode    ierr;

1788:   if (!pcbddc->benign_have_null) {
1789:     return(0);
1790:   }
1791:   if (pcbddc->ChangeOfBasisMatrix) {
1792:     Vec swap;

1794:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1795:     swap = pcbddc->work_change;
1796:     pcbddc->work_change = r;
1797:     r = swap;
1798:   }
1799:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1800:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1801:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1802:   VecSet(z,0.);
1803:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1804:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1805:   if (pcbddc->ChangeOfBasisMatrix) {
1806:     pcbddc->work_change = r;
1807:     VecCopy(z,pcbddc->work_change);
1808:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1809:   }
1810:   return(0);
1811: }

1813: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1814: {
1815:   PCBDDCBenignMatMult_ctx ctx;
1816:   PetscErrorCode          ierr;
1817:   PetscBool               apply_right,apply_left,reset_x;

1820:   MatShellGetContext(A,&ctx);
1821:   if (transpose) {
1822:     apply_right = ctx->apply_left;
1823:     apply_left = ctx->apply_right;
1824:   } else {
1825:     apply_right = ctx->apply_right;
1826:     apply_left = ctx->apply_left;
1827:   }
1828:   reset_x = PETSC_FALSE;
1829:   if (apply_right) {
1830:     const PetscScalar *ax;
1831:     PetscInt          nl,i;

1833:     VecGetLocalSize(x,&nl);
1834:     VecGetArrayRead(x,&ax);
1835:     PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1836:     VecRestoreArrayRead(x,&ax);
1837:     for (i=0;i<ctx->benign_n;i++) {
1838:       PetscScalar    sum,val;
1839:       const PetscInt *idxs;
1840:       PetscInt       nz,j;
1841:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1842:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1843:       sum = 0.;
1844:       if (ctx->apply_p0) {
1845:         val = ctx->work[idxs[nz-1]];
1846:         for (j=0;j<nz-1;j++) {
1847:           sum += ctx->work[idxs[j]];
1848:           ctx->work[idxs[j]] += val;
1849:         }
1850:       } else {
1851:         for (j=0;j<nz-1;j++) {
1852:           sum += ctx->work[idxs[j]];
1853:         }
1854:       }
1855:       ctx->work[idxs[nz-1]] -= sum;
1856:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1857:     }
1858:     VecPlaceArray(x,ctx->work);
1859:     reset_x = PETSC_TRUE;
1860:   }
1861:   if (transpose) {
1862:     MatMultTranspose(ctx->A,x,y);
1863:   } else {
1864:     MatMult(ctx->A,x,y);
1865:   }
1866:   if (reset_x) {
1867:     VecResetArray(x);
1868:   }
1869:   if (apply_left) {
1870:     PetscScalar *ay;
1871:     PetscInt    i;

1873:     VecGetArray(y,&ay);
1874:     for (i=0;i<ctx->benign_n;i++) {
1875:       PetscScalar    sum,val;
1876:       const PetscInt *idxs;
1877:       PetscInt       nz,j;
1878:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1879:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1880:       val = -ay[idxs[nz-1]];
1881:       if (ctx->apply_p0) {
1882:         sum = 0.;
1883:         for (j=0;j<nz-1;j++) {
1884:           sum += ay[idxs[j]];
1885:           ay[idxs[j]] += val;
1886:         }
1887:         ay[idxs[nz-1]] += sum;
1888:       } else {
1889:         for (j=0;j<nz-1;j++) {
1890:           ay[idxs[j]] += val;
1891:         }
1892:         ay[idxs[nz-1]] = 0.;
1893:       }
1894:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1895:     }
1896:     VecRestoreArray(y,&ay);
1897:   }
1898:   return(0);
1899: }

1901: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1902: {

1906:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
1907:   return(0);
1908: }

1910: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1911: {

1915:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
1916:   return(0);
1917: }

1919: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1920: {
1921:   PC_IS                   *pcis = (PC_IS*)pc->data;
1922:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
1923:   PCBDDCBenignMatMult_ctx ctx;
1924:   PetscErrorCode          ierr;

1927:   if (!restore) {
1928:     Mat                A_IB,A_BI;
1929:     PetscScalar        *work;
1930:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

1932:     if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
1933:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
1934:     PetscMalloc1(pcis->n,&work);
1935:     MatCreate(PETSC_COMM_SELF,&A_IB);
1936:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
1937:     MatSetType(A_IB,MATSHELL);
1938:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
1939:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
1940:     PetscNew(&ctx);
1941:     MatShellSetContext(A_IB,ctx);
1942:     ctx->apply_left = PETSC_TRUE;
1943:     ctx->apply_right = PETSC_FALSE;
1944:     ctx->apply_p0 = PETSC_FALSE;
1945:     ctx->benign_n = pcbddc->benign_n;
1946:     if (reuse) {
1947:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
1948:       ctx->free = PETSC_FALSE;
1949:     } else { /* TODO: could be optimized for successive solves */
1950:       ISLocalToGlobalMapping N_to_D;
1951:       PetscInt               i;

1953:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
1954:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
1955:       for (i=0;i<pcbddc->benign_n;i++) {
1956:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
1957:       }
1958:       ISLocalToGlobalMappingDestroy(&N_to_D);
1959:       ctx->free = PETSC_TRUE;
1960:     }
1961:     ctx->A = pcis->A_IB;
1962:     ctx->work = work;
1963:     MatSetUp(A_IB);
1964:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
1965:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
1966:     pcis->A_IB = A_IB;

1968:     /* A_BI as A_IB^T */
1969:     MatCreateTranspose(A_IB,&A_BI);
1970:     pcbddc->benign_original_mat = pcis->A_BI;
1971:     pcis->A_BI = A_BI;
1972:   } else {
1973:     if (!pcbddc->benign_original_mat) {
1974:       return(0);
1975:     }
1976:     MatShellGetContext(pcis->A_IB,&ctx);
1977:     MatDestroy(&pcis->A_IB);
1978:     pcis->A_IB = ctx->A;
1979:     ctx->A = NULL;
1980:     MatDestroy(&pcis->A_BI);
1981:     pcis->A_BI = pcbddc->benign_original_mat;
1982:     pcbddc->benign_original_mat = NULL;
1983:     if (ctx->free) {
1984:       PetscInt i;
1985:       for (i=0;i<ctx->benign_n;i++) {
1986:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
1987:       }
1988:       PetscFree(ctx->benign_zerodiag_subs);
1989:     }
1990:     PetscFree(ctx->work);
1991:     PetscFree(ctx);
1992:   }
1993:   return(0);
1994: }

1996: /* used just in bddc debug mode */
1997: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
1998: {
1999:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2000:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2001:   Mat            An;

2005:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2006:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2007:   if (is1) {
2008:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2009:     MatDestroy(&An);
2010:   } else {
2011:     *B = An;
2012:   }
2013:   return(0);
2014: }

2016: /* TODO: add reuse flag */
2017: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2018: {
2019:   Mat            Bt;
2020:   PetscScalar    *a,*bdata;
2021:   const PetscInt *ii,*ij;
2022:   PetscInt       m,n,i,nnz,*bii,*bij;
2023:   PetscBool      flg_row;

2027:   MatGetSize(A,&n,&m);
2028:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2029:   MatSeqAIJGetArray(A,&a);
2030:   nnz = n;
2031:   for (i=0;i<ii[n];i++) {
2032:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2033:   }
2034:   PetscMalloc1(n+1,&bii);
2035:   PetscMalloc1(nnz,&bij);
2036:   PetscMalloc1(nnz,&bdata);
2037:   nnz = 0;
2038:   bii[0] = 0;
2039:   for (i=0;i<n;i++) {
2040:     PetscInt j;
2041:     for (j=ii[i];j<ii[i+1];j++) {
2042:       PetscScalar entry = a[j];
2043:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
2044:         bij[nnz] = ij[j];
2045:         bdata[nnz] = entry;
2046:         nnz++;
2047:       }
2048:     }
2049:     bii[i+1] = nnz;
2050:   }
2051:   MatSeqAIJRestoreArray(A,&a);
2052:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2053:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2054:   {
2055:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2056:     b->free_a = PETSC_TRUE;
2057:     b->free_ij = PETSC_TRUE;
2058:   }
2059:   *B = Bt;
2060:   return(0);
2061: }

2063: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv)
2064: {
2065:   Mat                    B = NULL;
2066:   DM                     dm;
2067:   IS                     is_dummy,*cc_n;
2068:   ISLocalToGlobalMapping l2gmap_dummy;
2069:   PCBDDCGraph            graph;
2070:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2071:   PetscInt               i,n;
2072:   PetscInt               *xadj,*adjncy;
2073:   PetscBool              isplex = PETSC_FALSE;
2074:   PetscErrorCode         ierr;

2077:   PCBDDCGraphCreate(&graph);
2078:   PCGetDM(pc,&dm);
2079:   if (!dm) {
2080:     MatGetDM(pc->pmat,&dm);
2081:   }
2082:   if (dm) {
2083:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2084:   }
2085:   if (isplex) { /* this code has been modified from plexpartition.c */
2086:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2087:     PetscInt      *adj = NULL;
2088:     IS             cellNumbering;
2089:     const PetscInt *cellNum;
2090:     PetscBool      useCone, useClosure;
2091:     PetscSection   section;
2092:     PetscSegBuffer adjBuffer;
2093:     PetscSF        sfPoint;

2097:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2098:     DMGetPointSF(dm, &sfPoint);
2099:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2100:     /* Build adjacency graph via a section/segbuffer */
2101:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2102:     PetscSectionSetChart(section, pStart, pEnd);
2103:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2104:     /* Always use FVM adjacency to create partitioner graph */
2105:     DMPlexGetAdjacencyUseCone(dm, &useCone);
2106:     DMPlexGetAdjacencyUseClosure(dm, &useClosure);
2107:     DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);
2108:     DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);
2109:     DMPlexGetCellNumbering(dm, &cellNumbering);
2110:     ISGetIndices(cellNumbering, &cellNum);
2111:     for (n = 0, p = pStart; p < pEnd; p++) {
2112:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2113:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2114:       adjSize = PETSC_DETERMINE;
2115:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2116:       for (a = 0; a < adjSize; ++a) {
2117:         const PetscInt point = adj[a];
2118:         if (pStart <= point && point < pEnd) {
2119:           PetscInt *PETSC_RESTRICT pBuf;
2120:           PetscSectionAddDof(section, p, 1);
2121:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2122:           *pBuf = point;
2123:         }
2124:       }
2125:       n++;
2126:     }
2127:     DMPlexSetAdjacencyUseCone(dm, useCone);
2128:     DMPlexSetAdjacencyUseClosure(dm, useClosure);
2129:     /* Derive CSR graph from section/segbuffer */
2130:     PetscSectionSetUp(section);
2131:     PetscSectionGetStorageSize(section, &size);
2132:     PetscMalloc1(n+1, &xadj);
2133:     for (idx = 0, p = pStart; p < pEnd; p++) {
2134:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2135:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2136:     }
2137:     xadj[n] = size;
2138:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2139:     /* Clean up */
2140:     PetscSegBufferDestroy(&adjBuffer);
2141:     PetscSectionDestroy(&section);
2142:     PetscFree(adj);
2143:     graph->xadj = xadj;
2144:     graph->adjncy = adjncy;
2145:   } else {
2146:     Mat       A;
2147:     PetscBool filter = PETSC_FALSE, isseqaij, flg_row;

2149:     MatISGetLocalMat(pc->pmat,&A);
2150:     if (!A->rmap->N || !A->cmap->N) {
2151:       *ncc = 0;
2152:       *cc = NULL;
2153:       return(0);
2154:     }
2155:     PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2156:     if (!isseqaij && filter) {
2157:       PetscBool isseqdense;

2159:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2160:       if (!isseqdense) {
2161:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2162:       } else { /* TODO: rectangular case and LDA */
2163:         PetscScalar *array;
2164:         PetscReal   chop=1.e-6;

2166:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2167:         MatDenseGetArray(B,&array);
2168:         MatGetSize(B,&n,NULL);
2169:         for (i=0;i<n;i++) {
2170:           PetscInt j;
2171:           for (j=i+1;j<n;j++) {
2172:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2173:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2174:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2175:           }
2176:         }
2177:         MatDenseRestoreArray(B,&array);
2178:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2179:       }
2180:     } else {
2181:       PetscObjectReference((PetscObject)A);
2182:       B = A;
2183:     }
2184:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2186:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2187:     if (filter) {
2188:       PetscScalar *data;
2189:       PetscInt    j,cum;

2191:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2192:       MatSeqAIJGetArray(B,&data);
2193:       cum = 0;
2194:       for (i=0;i<n;i++) {
2195:         PetscInt t;

2197:         for (j=xadj[i];j<xadj[i+1];j++) {
2198:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2199:             continue;
2200:           }
2201:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2202:         }
2203:         t = xadj_filtered[i];
2204:         xadj_filtered[i] = cum;
2205:         cum += t;
2206:       }
2207:       MatSeqAIJRestoreArray(B,&data);
2208:       graph->xadj = xadj_filtered;
2209:       graph->adjncy = adjncy_filtered;
2210:     } else {
2211:       graph->xadj = xadj;
2212:       graph->adjncy = adjncy;
2213:     }
2214:   }
2215:   /* compute local connected components using PCBDDCGraph */
2216:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2217:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2218:   ISDestroy(&is_dummy);
2219:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2220:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2221:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2222:   PCBDDCGraphComputeConnectedComponents(graph);

2224:   /* partial clean up */
2225:   PetscFree2(xadj_filtered,adjncy_filtered);
2226:   if (B) {
2227:     PetscBool flg_row;
2228:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2229:     MatDestroy(&B);
2230:   }
2231:   if (isplex) {
2232:     PetscFree(xadj);
2233:     PetscFree(adjncy);
2234:   }

2236:   /* get back data */
2237:   if (isplex) {
2238:     if (ncc) *ncc = graph->ncc;
2239:     if (cc || primalv) {
2240:       Mat          A;
2241:       PetscBT      btv,btvt;
2242:       PetscSection subSection;
2243:       PetscInt     *ids,cum,cump,*cids,*pids;

2245:       DMPlexGetSubdomainSection(dm,&subSection);
2246:       MatISGetLocalMat(pc->pmat,&A);
2247:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2248:       PetscBTCreate(A->rmap->n,&btv);
2249:       PetscBTCreate(A->rmap->n,&btvt);

2251:       cids[0] = 0;
2252:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2253:         PetscInt j;

2255:         PetscBTMemzero(A->rmap->n,btvt);
2256:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2257:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2259:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2260:           for (k = 0; k < 2*size; k += 2) {
2261:             PetscInt s, p = closure[k], off, dof, cdof;

2263:             PetscSectionGetConstraintDof(subSection, p, &cdof);
2264:             PetscSectionGetOffset(subSection,p,&off);
2265:             PetscSectionGetDof(subSection,p,&dof);
2266:             for (s = 0; s < dof-cdof; s++) {
2267:               if (PetscBTLookupSet(btvt,off+s)) continue;
2268:               if (!PetscBTLookup(btv,off+s)) {
2269:                 ids[cum++] = off+s;
2270:               } else { /* cross-vertex */
2271:                 pids[cump++] = off+s;
2272:               }
2273:             }
2274:           }
2275:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2276:         }
2277:         cids[i+1] = cum;
2278:         /* mark dofs as already assigned */
2279:         for (j = cids[i]; j < cids[i+1]; j++) {
2280:           PetscBTSet(btv,ids[j]);
2281:         }
2282:       }
2283:       if (cc) {
2284:         PetscMalloc1(graph->ncc,&cc_n);
2285:         for (i = 0; i < graph->ncc; i++) {
2286:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2287:         }
2288:         *cc = cc_n;
2289:       }
2290:       if (primalv) {
2291:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2292:       }
2293:       PetscFree3(ids,cids,pids);
2294:       PetscBTDestroy(&btv);
2295:       PetscBTDestroy(&btvt);
2296:     }
2297:   } else {
2298:     if (ncc) *ncc = graph->ncc;
2299:     if (cc) {
2300:       PetscMalloc1(graph->ncc,&cc_n);
2301:       for (i=0;i<graph->ncc;i++) {
2302:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2303:       }
2304:       *cc = cc_n;
2305:     }
2306:     if (primalv) *primalv = NULL;
2307:   }
2308:   /* clean up graph */
2309:   graph->xadj = 0;
2310:   graph->adjncy = 0;
2311:   PCBDDCGraphDestroy(&graph);
2312:   return(0);
2313: }

2315: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2316: {
2317:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2318:   PC_IS*         pcis = (PC_IS*)(pc->data);
2319:   IS             dirIS = NULL;
2320:   PetscInt       i;

2324:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2325:   if (zerodiag) {
2326:     Mat            A;
2327:     Vec            vec3_N;
2328:     PetscScalar    *vals;
2329:     const PetscInt *idxs;
2330:     PetscInt       nz,*count;

2332:     /* p0 */
2333:     VecSet(pcis->vec1_N,0.);
2334:     PetscMalloc1(pcis->n,&vals);
2335:     ISGetLocalSize(zerodiag,&nz);
2336:     ISGetIndices(zerodiag,&idxs);
2337:     for (i=0;i<nz;i++) vals[i] = 1.;
2338:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2339:     VecAssemblyBegin(pcis->vec1_N);
2340:     VecAssemblyEnd(pcis->vec1_N);
2341:     /* v_I */
2342:     VecSetRandom(pcis->vec2_N,NULL);
2343:     for (i=0;i<nz;i++) vals[i] = 0.;
2344:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2345:     ISRestoreIndices(zerodiag,&idxs);
2346:     ISGetIndices(pcis->is_B_local,&idxs);
2347:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2348:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2349:     ISRestoreIndices(pcis->is_B_local,&idxs);
2350:     if (dirIS) {
2351:       PetscInt n;

2353:       ISGetLocalSize(dirIS,&n);
2354:       ISGetIndices(dirIS,&idxs);
2355:       for (i=0;i<n;i++) vals[i] = 0.;
2356:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2357:       ISRestoreIndices(dirIS,&idxs);
2358:     }
2359:     VecAssemblyBegin(pcis->vec2_N);
2360:     VecAssemblyEnd(pcis->vec2_N);
2361:     VecDuplicate(pcis->vec1_N,&vec3_N);
2362:     VecSet(vec3_N,0.);
2363:     MatISGetLocalMat(pc->pmat,&A);
2364:     MatMult(A,pcis->vec1_N,vec3_N);
2365:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2366:     if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2367:     PetscFree(vals);
2368:     VecDestroy(&vec3_N);

2370:     /* there should not be any pressure dofs lying on the interface */
2371:     PetscCalloc1(pcis->n,&count);
2372:     ISGetIndices(pcis->is_B_local,&idxs);
2373:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2374:     ISRestoreIndices(pcis->is_B_local,&idxs);
2375:     ISGetIndices(zerodiag,&idxs);
2376:     for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]);
2377:     ISRestoreIndices(zerodiag,&idxs);
2378:     PetscFree(count);
2379:   }
2380:   ISDestroy(&dirIS);

2382:   /* check PCBDDCBenignGetOrSetP0 */
2383:   VecSetRandom(pcis->vec1_global,NULL);
2384:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2385:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2386:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2387:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2388:   for (i=0;i<pcbddc->benign_n;i++) {
2389:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2390:     if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2391:   }
2392:   return(0);
2393: }

2395: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
2396: {
2397:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2398:   IS             pressures,zerodiag,zerodiag_save,*zerodiag_subs;
2399:   PetscInt       nz,n;
2400:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2401:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2405:   PetscSFDestroy(&pcbddc->benign_sf);
2406:   MatDestroy(&pcbddc->benign_B0);
2407:   for (n=0;n<pcbddc->benign_n;n++) {
2408:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2409:   }
2410:   PetscFree(pcbddc->benign_zerodiag_subs);
2411:   pcbddc->benign_n = 0;

2413:   /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line)
2414:      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
2415:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2416:      If not, a change of basis on pressures is not needed
2417:      since the local Schur complements are already SPD
2418:   */
2419:   has_null_pressures = PETSC_TRUE;
2420:   have_null = PETSC_TRUE;
2421:   if (pcbddc->n_ISForDofsLocal) {
2422:     IS       iP = NULL;
2423:     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;

2425:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2426:     PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);
2427:     PetscOptionsEnd();
2428:     if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p);
2429:     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
2430:     ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);
2431:     ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2432:     ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);
2433:     ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2434:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2435:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2436:     if (iP) {
2437:       IS newpressures;

2439:       ISDifference(pressures,iP,&newpressures);
2440:       ISDestroy(&pressures);
2441:       pressures = newpressures;
2442:     }
2443:     ISSorted(pressures,&sorted);
2444:     if (!sorted) {
2445:       ISSort(pressures);
2446:     }
2447:   } else {
2448:     pressures = NULL;
2449:   }
2450:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2451:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2452:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2453:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2454:   ISSorted(zerodiag,&sorted);
2455:   if (!sorted) {
2456:     ISSort(zerodiag);
2457:   }
2458:   PetscObjectReference((PetscObject)zerodiag);
2459:   zerodiag_save = zerodiag;
2460:   ISGetLocalSize(zerodiag,&nz);
2461:   if (!nz) {
2462:     if (n) have_null = PETSC_FALSE;
2463:     has_null_pressures = PETSC_FALSE;
2464:     ISDestroy(&zerodiag);
2465:   }
2466:   recompute_zerodiag = PETSC_FALSE;
2467:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2468:   zerodiag_subs    = NULL;
2469:   pcbddc->benign_n = 0;
2470:   n_interior_dofs  = 0;
2471:   interior_dofs    = NULL;
2472:   nneu             = 0;
2473:   if (pcbddc->NeumannBoundariesLocal) {
2474:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2475:   }
2476:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2477:   if (checkb) { /* need to compute interior nodes */
2478:     PetscInt n,i,j;
2479:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2480:     PetscInt *iwork;

2482:     ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2483:     ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2484:     PetscCalloc1(n,&iwork);
2485:     PetscMalloc1(n,&interior_dofs);
2486:     for (i=1;i<n_neigh;i++)
2487:       for (j=0;j<n_shared[i];j++)
2488:           iwork[shared[i][j]] += 1;
2489:     for (i=0;i<n;i++)
2490:       if (!iwork[i])
2491:         interior_dofs[n_interior_dofs++] = i;
2492:     PetscFree(iwork);
2493:     ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2494:   }
2495:   if (has_null_pressures) {
2496:     IS             *subs;
2497:     PetscInt       nsubs,i,j,nl;
2498:     const PetscInt *idxs;
2499:     PetscScalar    *array;
2500:     Vec            *work;
2501:     Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);

2503:     subs  = pcbddc->local_subs;
2504:     nsubs = pcbddc->n_local_subs;
2505:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2506:     if (checkb) {
2507:       VecDuplicateVecs(matis->y,2,&work);
2508:       ISGetLocalSize(zerodiag,&nl);
2509:       ISGetIndices(zerodiag,&idxs);
2510:       /* work[0] = 1_p */
2511:       VecSet(work[0],0.);
2512:       VecGetArray(work[0],&array);
2513:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2514:       VecRestoreArray(work[0],&array);
2515:       /* work[0] = 1_v */
2516:       VecSet(work[1],1.);
2517:       VecGetArray(work[1],&array);
2518:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2519:       VecRestoreArray(work[1],&array);
2520:       ISRestoreIndices(zerodiag,&idxs);
2521:     }
2522:     if (nsubs > 1) {
2523:       PetscCalloc1(nsubs,&zerodiag_subs);
2524:       for (i=0;i<nsubs;i++) {
2525:         ISLocalToGlobalMapping l2g;
2526:         IS                     t_zerodiag_subs;
2527:         PetscInt               nl;

2529:         ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2530:         ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);
2531:         ISGetLocalSize(t_zerodiag_subs,&nl);
2532:         if (nl) {
2533:           PetscBool valid = PETSC_TRUE;

2535:           if (checkb) {
2536:             VecSet(matis->x,0);
2537:             ISGetLocalSize(subs[i],&nl);
2538:             ISGetIndices(subs[i],&idxs);
2539:             VecGetArray(matis->x,&array);
2540:             for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2541:             VecRestoreArray(matis->x,&array);
2542:             ISRestoreIndices(subs[i],&idxs);
2543:             VecPointwiseMult(matis->x,work[0],matis->x);
2544:             MatMult(matis->A,matis->x,matis->y);
2545:             VecPointwiseMult(matis->y,work[1],matis->y);
2546:             VecGetArray(matis->y,&array);
2547:             for (j=0;j<n_interior_dofs;j++) {
2548:               if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2549:                 valid = PETSC_FALSE;
2550:                 break;
2551:               }
2552:             }
2553:             VecRestoreArray(matis->y,&array);
2554:           }
2555:           if (valid && nneu) {
2556:             const PetscInt *idxs;
2557:             PetscInt       nzb;

2559:             ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2560:             ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2561:             ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2562:             if (nzb) valid = PETSC_FALSE;
2563:           }
2564:           if (valid && pressures) {
2565:             IS t_pressure_subs;
2566:             ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2567:             ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);
2568:             ISDestroy(&t_pressure_subs);
2569:           }
2570:           if (valid) {
2571:             ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);
2572:             pcbddc->benign_n++;
2573:           } else {
2574:             recompute_zerodiag = PETSC_TRUE;
2575:           }
2576:         }
2577:         ISDestroy(&t_zerodiag_subs);
2578:         ISLocalToGlobalMappingDestroy(&l2g);
2579:       }
2580:     } else { /* there's just one subdomain (or zero if they have not been detected */
2581:       PetscBool valid = PETSC_TRUE;

2583:       if (nneu) valid = PETSC_FALSE;
2584:       if (valid && pressures) {
2585:         ISEqual(pressures,zerodiag,&valid);
2586:       }
2587:       if (valid && checkb) {
2588:         MatMult(matis->A,work[0],matis->x);
2589:         VecPointwiseMult(matis->x,work[1],matis->x);
2590:         VecGetArray(matis->x,&array);
2591:         for (j=0;j<n_interior_dofs;j++) {
2592:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2593:             valid = PETSC_FALSE;
2594:             break;
2595:           }
2596:         }
2597:         VecRestoreArray(matis->x,&array);
2598:       }
2599:       if (valid) {
2600:         pcbddc->benign_n = 1;
2601:         PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);
2602:         PetscObjectReference((PetscObject)zerodiag);
2603:         zerodiag_subs[0] = zerodiag;
2604:       }
2605:     }
2606:     if (checkb) {
2607:       VecDestroyVecs(2,&work);
2608:     }
2609:   }
2610:   PetscFree(interior_dofs);

2612:   if (!pcbddc->benign_n) {
2613:     PetscInt n;

2615:     ISDestroy(&zerodiag);
2616:     recompute_zerodiag = PETSC_FALSE;
2617:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2618:     if (n) {
2619:       has_null_pressures = PETSC_FALSE;
2620:       have_null = PETSC_FALSE;
2621:     }
2622:   }

2624:   /* final check for null pressures */
2625:   if (zerodiag && pressures) {
2626:     PetscInt nz,np;
2627:     ISGetLocalSize(zerodiag,&nz);
2628:     ISGetLocalSize(pressures,&np);
2629:     if (nz != np) have_null = PETSC_FALSE;
2630:   }

2632:   if (recompute_zerodiag) {
2633:     ISDestroy(&zerodiag);
2634:     if (pcbddc->benign_n == 1) {
2635:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2636:       zerodiag = zerodiag_subs[0];
2637:     } else {
2638:       PetscInt i,nzn,*new_idxs;

2640:       nzn = 0;
2641:       for (i=0;i<pcbddc->benign_n;i++) {
2642:         PetscInt ns;
2643:         ISGetLocalSize(zerodiag_subs[i],&ns);
2644:         nzn += ns;
2645:       }
2646:       PetscMalloc1(nzn,&new_idxs);
2647:       nzn = 0;
2648:       for (i=0;i<pcbddc->benign_n;i++) {
2649:         PetscInt ns,*idxs;
2650:         ISGetLocalSize(zerodiag_subs[i],&ns);
2651:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2652:         PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2653:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2654:         nzn += ns;
2655:       }
2656:       PetscSortInt(nzn,new_idxs);
2657:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2658:     }
2659:     have_null = PETSC_FALSE;
2660:   }

2662:   /* Prepare matrix to compute no-net-flux */
2663:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2664:     Mat                    A,loc_divudotp;
2665:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2666:     IS                     row,col,isused = NULL;
2667:     PetscInt               M,N,n,st,n_isused;

2669:     if (pressures) {
2670:       isused = pressures;
2671:     } else {
2672:       isused = zerodiag_save;
2673:     }
2674:     MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2675:     MatISGetLocalMat(pc->pmat,&A);
2676:     MatGetLocalSize(A,&n,NULL);
2677:     if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2678:     n_isused = 0;
2679:     if (isused) {
2680:       ISGetLocalSize(isused,&n_isused);
2681:     }
2682:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2683:     st = st-n_isused;
2684:     if (n) {
2685:       const PetscInt *gidxs;

2687:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2688:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2689:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2690:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2691:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2692:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2693:     } else {
2694:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2695:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2696:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2697:     }
2698:     MatGetSize(pc->pmat,NULL,&N);
2699:     ISGetSize(row,&M);
2700:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2701:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2702:     ISDestroy(&row);
2703:     ISDestroy(&col);
2704:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2705:     MatSetType(pcbddc->divudotp,MATIS);
2706:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2707:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2708:     ISLocalToGlobalMappingDestroy(&rl2g);
2709:     ISLocalToGlobalMappingDestroy(&cl2g);
2710:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2711:     MatDestroy(&loc_divudotp);
2712:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2713:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2714:   }
2715:   ISDestroy(&zerodiag_save);

2717:   /* change of basis and p0 dofs */
2718:   if (has_null_pressures) {
2719:     IS             zerodiagc;
2720:     const PetscInt *idxs,*idxsc;
2721:     PetscInt       i,s,*nnz;

2723:     ISGetLocalSize(zerodiag,&nz);
2724:     ISComplement(zerodiag,0,n,&zerodiagc);
2725:     ISGetIndices(zerodiagc,&idxsc);
2726:     /* local change of basis for pressures */
2727:     MatDestroy(&pcbddc->benign_change);
2728:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2729:     MatSetType(pcbddc->benign_change,MATAIJ);
2730:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2731:     PetscMalloc1(n,&nnz);
2732:     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
2733:     for (i=0;i<pcbddc->benign_n;i++) {
2734:       PetscInt nzs,j;

2736:       ISGetLocalSize(zerodiag_subs[i],&nzs);
2737:       ISGetIndices(zerodiag_subs[i],&idxs);
2738:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2739:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2740:       ISRestoreIndices(zerodiag_subs[i],&idxs);
2741:     }
2742:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2743:     PetscFree(nnz);
2744:     /* set identity on velocities */
2745:     for (i=0;i<n-nz;i++) {
2746:       MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);
2747:     }
2748:     ISRestoreIndices(zerodiagc,&idxsc);
2749:     ISDestroy(&zerodiagc);
2750:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2751:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2752:     /* set change on pressures */
2753:     for (s=0;s<pcbddc->benign_n;s++) {
2754:       PetscScalar *array;
2755:       PetscInt    nzs;

2757:       ISGetLocalSize(zerodiag_subs[s],&nzs);
2758:       ISGetIndices(zerodiag_subs[s],&idxs);
2759:       for (i=0;i<nzs-1;i++) {
2760:         PetscScalar vals[2];
2761:         PetscInt    cols[2];

2763:         cols[0] = idxs[i];
2764:         cols[1] = idxs[nzs-1];
2765:         vals[0] = 1.;
2766:         vals[1] = 1.;
2767:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2768:       }
2769:       PetscMalloc1(nzs,&array);
2770:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2771:       array[nzs-1] = 1.;
2772:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2773:       /* store local idxs for p0 */
2774:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2775:       ISRestoreIndices(zerodiag_subs[s],&idxs);
2776:       PetscFree(array);
2777:     }
2778:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2779:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2780:     /* project if needed */
2781:     if (pcbddc->benign_change_explicit) {
2782:       Mat M;

2784:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2785:       MatDestroy(&pcbddc->local_mat);
2786:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2787:       MatDestroy(&M);
2788:     }
2789:     /* store global idxs for p0 */
2790:     ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2791:   }
2792:   pcbddc->benign_zerodiag_subs = zerodiag_subs;
2793:   ISDestroy(&pressures);

2795:   /* determines if the coarse solver will be singular or not */
2796:   MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2797:   /* determines if the problem has subdomains with 0 pressure block */
2798:   MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2799:   *zerodiaglocal = zerodiag;
2800:   return(0);
2801: }

2803: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2804: {
2805:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2806:   PetscScalar    *array;

2810:   if (!pcbddc->benign_sf) {
2811:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2812:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2813:   }
2814:   if (get) {
2815:     VecGetArrayRead(v,(const PetscScalar**)&array);
2816:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2817:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2818:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
2819:   } else {
2820:     VecGetArray(v,&array);
2821:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2822:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2823:     VecRestoreArray(v,&array);
2824:   }
2825:   return(0);
2826: }

2828: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2829: {
2830:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

2834:   /* TODO: add error checking
2835:     - avoid nested pop (or push) calls.
2836:     - cannot push before pop.
2837:     - cannot call this if pcbddc->local_mat is NULL
2838:   */
2839:   if (!pcbddc->benign_n) {
2840:     return(0);
2841:   }
2842:   if (pop) {
2843:     if (pcbddc->benign_change_explicit) {
2844:       IS       is_p0;
2845:       MatReuse reuse;

2847:       /* extract B_0 */
2848:       reuse = MAT_INITIAL_MATRIX;
2849:       if (pcbddc->benign_B0) {
2850:         reuse = MAT_REUSE_MATRIX;
2851:       }
2852:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
2853:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
2854:       /* remove rows and cols from local problem */
2855:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
2856:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2857:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
2858:       ISDestroy(&is_p0);
2859:     } else {
2860:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
2861:       PetscScalar *vals;
2862:       PetscInt    i,n,*idxs_ins;

2864:       VecGetLocalSize(matis->y,&n);
2865:       PetscMalloc2(n,&idxs_ins,n,&vals);
2866:       if (!pcbddc->benign_B0) {
2867:         PetscInt *nnz;
2868:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
2869:         MatSetType(pcbddc->benign_B0,MATAIJ);
2870:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
2871:         PetscMalloc1(pcbddc->benign_n,&nnz);
2872:         for (i=0;i<pcbddc->benign_n;i++) {
2873:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
2874:           nnz[i] = n - nnz[i];
2875:         }
2876:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
2877:         PetscFree(nnz);
2878:       }

2880:       for (i=0;i<pcbddc->benign_n;i++) {
2881:         PetscScalar *array;
2882:         PetscInt    *idxs,j,nz,cum;

2884:         VecSet(matis->x,0.);
2885:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
2886:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2887:         for (j=0;j<nz;j++) vals[j] = 1.;
2888:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
2889:         VecAssemblyBegin(matis->x);
2890:         VecAssemblyEnd(matis->x);
2891:         VecSet(matis->y,0.);
2892:         MatMult(matis->A,matis->x,matis->y);
2893:         VecGetArray(matis->y,&array);
2894:         cum = 0;
2895:         for (j=0;j<n;j++) {
2896:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2897:             vals[cum] = array[j];
2898:             idxs_ins[cum] = j;
2899:             cum++;
2900:           }
2901:         }
2902:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
2903:         VecRestoreArray(matis->y,&array);
2904:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2905:       }
2906:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2907:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2908:       PetscFree2(idxs_ins,vals);
2909:     }
2910:   } else { /* push */
2911:     if (pcbddc->benign_change_explicit) {
2912:       PetscInt i;

2914:       for (i=0;i<pcbddc->benign_n;i++) {
2915:         PetscScalar *B0_vals;
2916:         PetscInt    *B0_cols,B0_ncol;

2918:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2919:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
2920:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
2921:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
2922:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2923:       }
2924:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2925:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2926:     } else {
2927:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n");
2928:     }
2929:   }
2930:   return(0);
2931: }

2933: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
2934: {
2935:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
2936:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2937:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
2938:   PetscBLASInt    *B_iwork,*B_ifail;
2939:   PetscScalar     *work,lwork;
2940:   PetscScalar     *St,*S,*eigv;
2941:   PetscScalar     *Sarray,*Starray;
2942:   PetscReal       *eigs,thresh;
2943:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
2944:   PetscBool       allocated_S_St;
2945: #if defined(PETSC_USE_COMPLEX)
2946:   PetscReal       *rwork;
2947: #endif
2948:   PetscErrorCode  ierr;

2951:   if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
2952:   if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
2953:   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef);

2955:   if (pcbddc->dbg_flag) {
2956:     PetscViewerFlush(pcbddc->dbg_viewer);
2957:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
2958:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
2959:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
2960:   }

2962:   if (pcbddc->dbg_flag) {
2963:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
2964:   }

2966:   /* max size of subsets */
2967:   mss = 0;
2968:   for (i=0;i<sub_schurs->n_subs;i++) {
2969:     PetscInt subset_size;

2971:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
2972:     mss = PetscMax(mss,subset_size);
2973:   }

2975:   /* min/max and threshold */
2976:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
2977:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
2978:   nmax = PetscMax(nmin,nmax);
2979:   allocated_S_St = PETSC_FALSE;
2980:   if (nmin) {
2981:     allocated_S_St = PETSC_TRUE;
2982:   }

2984:   /* allocate lapack workspace */
2985:   cum = cum2 = 0;
2986:   maxneigs = 0;
2987:   for (i=0;i<sub_schurs->n_subs;i++) {
2988:     PetscInt n,subset_size;

2990:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
2991:     n = PetscMin(subset_size,nmax);
2992:     cum += subset_size;
2993:     cum2 += subset_size*n;
2994:     maxneigs = PetscMax(maxneigs,n);
2995:   }
2996:   if (mss) {
2997:     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
2998:       PetscBLASInt B_itype = 1;
2999:       PetscBLASInt B_N = mss;
3000:       PetscReal    zero = 0.0;
3001:       PetscReal    eps = 0.0; /* dlamch? */

3003:       B_lwork = -1;
3004:       S = NULL;
3005:       St = NULL;
3006:       eigs = NULL;
3007:       eigv = NULL;
3008:       B_iwork = NULL;
3009:       B_ifail = NULL;
3010: #if defined(PETSC_USE_COMPLEX)
3011:       rwork = NULL;
3012: #endif
3013:       thresh = 1.0;
3014:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3015: #if defined(PETSC_USE_COMPLEX)
3016:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3017: #else
3018:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3019: #endif
3020:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3021:       PetscFPTrapPop();
3022:     } else {
3023:         /* TODO */
3024:     }
3025:   } else {
3026:     lwork = 0;
3027:   }

3029:   nv = 0;
3030:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3031:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3032:   }
3033:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3034:   if (allocated_S_St) {
3035:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3036:   }
3037:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3038: #if defined(PETSC_USE_COMPLEX)
3039:   PetscMalloc1(7*mss,&rwork);
3040: #endif
3041:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3042:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3043:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3044:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3045:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3046:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

3048:   maxneigs = 0;
3049:   cum = cumarray = 0;
3050:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3051:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3052:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3053:     const PetscInt *idxs;

3055:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3056:     for (cum=0;cum<nv;cum++) {
3057:       pcbddc->adaptive_constraints_n[cum] = 1;
3058:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3059:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3060:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3061:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3062:     }
3063:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3064:   }

3066:   if (mss) { /* multilevel */
3067:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3068:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3069:   }

3071:   thresh = pcbddc->adaptive_threshold;
3072:   for (i=0;i<sub_schurs->n_subs;i++) {
3073:     const PetscInt *idxs;
3074:     PetscReal      upper,lower;
3075:     PetscInt       j,subset_size,eigs_start = 0;
3076:     PetscBLASInt   B_N;
3077:     PetscBool      same_data = PETSC_FALSE;

3079:     if (pcbddc->use_deluxe_scaling) {
3080:       upper = PETSC_MAX_REAL;
3081:       lower = thresh;
3082:     } else {
3083:       upper = 1./thresh;
3084:       lower = 0.;
3085:     }
3086:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3087:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3088:     PetscBLASIntCast(subset_size,&B_N);
3089:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3090:       if (sub_schurs->is_hermitian) {
3091:         PetscInt j,k;
3092:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3093:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3094:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3095:         }
3096:         for (j=0;j<subset_size;j++) {
3097:           for (k=j;k<subset_size;k++) {
3098:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3099:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3100:           }
3101:         }
3102:       } else {
3103:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3104:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3105:       }
3106:     } else {
3107:       S = Sarray + cumarray;
3108:       St = Starray + cumarray;
3109:     }
3110:     /* see if we can save some work */
3111:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3112:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3113:     }

3115:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3116:       B_neigs = 0;
3117:     } else {
3118:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
3119:         PetscBLASInt B_itype = 1;
3120:         PetscBLASInt B_IL, B_IU;
3121:         PetscReal    eps = -1.0; /* dlamch? */
3122:         PetscInt     nmin_s;
3123:         PetscBool    compute_range = PETSC_FALSE;

3125:         if (pcbddc->dbg_flag) {
3126:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]);
3127:         }

3129:         compute_range = PETSC_FALSE;
3130:         if (thresh > 1.+PETSC_SMALL && !same_data) {
3131:           compute_range = PETSC_TRUE;
3132:         }

3134:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3135:         if (compute_range) {

3137:           /* ask for eigenvalues larger than thresh */
3138: #if defined(PETSC_USE_COMPLEX)
3139:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3140: #else
3141:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3142: #endif
3143:         } else if (!same_data) {
3144:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3145:           B_IL = 1;
3146: #if defined(PETSC_USE_COMPLEX)
3147:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3148: #else
3149:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3150: #endif
3151:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3152:           PetscInt k;
3153:           if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3154:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3155:           PetscBLASIntCast(nmax,&B_neigs);
3156:           nmin = nmax;
3157:           PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3158:           for (k=0;k<nmax;k++) {
3159:             eigs[k] = 1./PETSC_SMALL;
3160:             eigv[k*(subset_size+1)] = 1.0;
3161:           }
3162:         }
3163:         PetscFPTrapPop();
3164:         if (B_ierr) {
3165:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3166:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3167:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3168:         }

3170:         if (B_neigs > nmax) {
3171:           if (pcbddc->dbg_flag) {
3172:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
3173:           }
3174:           if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax;
3175:           B_neigs = nmax;
3176:         }

3178:         nmin_s = PetscMin(nmin,B_N);
3179:         if (B_neigs < nmin_s) {
3180:           PetscBLASInt B_neigs2;

3182:           if (pcbddc->use_deluxe_scaling) {
3183:             B_IL = B_N - nmin_s + 1;
3184:             B_IU = B_N - B_neigs;
3185:           } else {
3186:             B_IL = B_neigs + 1;
3187:             B_IU = nmin_s;
3188:           }
3189:           if (pcbddc->dbg_flag) {
3190:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3191:           }
3192:           if (sub_schurs->is_hermitian) {
3193:             PetscInt j,k;
3194:             for (j=0;j<subset_size;j++) {
3195:               for (k=j;k<subset_size;k++) {
3196:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3197:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3198:               }
3199:             }
3200:           } else {
3201:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3202:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3203:           }
3204:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3205: #if defined(PETSC_USE_COMPLEX)
3206:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3207: #else
3208:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3209: #endif
3210:           PetscFPTrapPop();
3211:           B_neigs += B_neigs2;
3212:         }
3213:         if (B_ierr) {
3214:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3215:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3216:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3217:         }
3218:         if (pcbddc->dbg_flag) {
3219:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3220:           for (j=0;j<B_neigs;j++) {
3221:             if (eigs[j] == 0.0) {
3222:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3223:             } else {
3224:               if (pcbddc->use_deluxe_scaling) {
3225:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3226:               } else {
3227:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3228:               }
3229:             }
3230:           }
3231:         }
3232:       } else {
3233:           /* TODO */
3234:       }
3235:     }
3236:     /* change the basis back to the original one */
3237:     if (sub_schurs->change) {
3238:       Mat change,phi,phit;

3240:       if (pcbddc->dbg_flag > 1) {
3241:         PetscInt ii;
3242:         for (ii=0;ii<B_neigs;ii++) {
3243:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3244:           for (j=0;j<B_N;j++) {
3245: #if defined(PETSC_USE_COMPLEX)
3246:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3247:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3248:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3249: #else
3250:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3251: #endif
3252:           }
3253:         }
3254:       }
3255:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3256:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3257:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3258:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3259:       MatDestroy(&phit);
3260:       MatDestroy(&phi);
3261:     }
3262:     maxneigs = PetscMax(B_neigs,maxneigs);
3263:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3264:     if (B_neigs) {
3265:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

3267:       if (pcbddc->dbg_flag > 1) {
3268:         PetscInt ii;
3269:         for (ii=0;ii<B_neigs;ii++) {
3270:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3271:           for (j=0;j<B_N;j++) {
3272: #if defined(PETSC_USE_COMPLEX)
3273:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3274:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3275:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3276: #else
3277:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3278: #endif
3279:           }
3280:         }
3281:       }
3282:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3283:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3284:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3285:       cum++;
3286:     }
3287:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3288:     /* shift for next computation */
3289:     cumarray += subset_size*subset_size;
3290:   }
3291:   if (pcbddc->dbg_flag) {
3292:     PetscViewerFlush(pcbddc->dbg_viewer);
3293:   }

3295:   if (mss) {
3296:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3297:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3298:     /* destroy matrices (junk) */
3299:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3300:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3301:   }
3302:   if (allocated_S_St) {
3303:     PetscFree2(S,St);
3304:   }
3305:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3306: #if defined(PETSC_USE_COMPLEX)
3307:   PetscFree(rwork);
3308: #endif
3309:   if (pcbddc->dbg_flag) {
3310:     PetscInt maxneigs_r;
3311:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3312:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
3313:   }
3314:   return(0);
3315: }

3317: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3318: {
3319:   PetscScalar    *coarse_submat_vals;

3323:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3324:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3325:   PCBDDCSetUpLocalScatters(pc);

3327:   /* Setup local neumann solver ksp_R */
3328:   /* PCBDDCSetUpLocalScatters should be called first! */
3329:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3331:   /*
3332:      Setup local correction and local part of coarse basis.
3333:      Gives back the dense local part of the coarse matrix in column major ordering
3334:   */
3335:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3337:   /* Compute total number of coarse nodes and setup coarse solver */
3338:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3340:   /* free */
3341:   PetscFree(coarse_submat_vals);
3342:   return(0);
3343: }

3345: PetscErrorCode PCBDDCResetCustomization(PC pc)
3346: {
3347:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3351:   ISDestroy(&pcbddc->user_primal_vertices);
3352:   ISDestroy(&pcbddc->user_primal_vertices_local);
3353:   ISDestroy(&pcbddc->NeumannBoundaries);
3354:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3355:   ISDestroy(&pcbddc->DirichletBoundaries);
3356:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3357:   PetscFree(pcbddc->onearnullvecs_state);
3358:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3359:   PCBDDCSetDofsSplitting(pc,0,NULL);
3360:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3361:   return(0);
3362: }

3364: PetscErrorCode PCBDDCResetTopography(PC pc)
3365: {
3366:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3367:   PetscInt       i;

3371:   MatDestroy(&pcbddc->nedcG);
3372:   ISDestroy(&pcbddc->nedclocal);
3373:   MatDestroy(&pcbddc->discretegradient);
3374:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3375:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3376:   MatDestroy(&pcbddc->switch_static_change);
3377:   VecDestroy(&pcbddc->work_change);
3378:   MatDestroy(&pcbddc->ConstraintMatrix);
3379:   MatDestroy(&pcbddc->divudotp);
3380:   ISDestroy(&pcbddc->divudotp_vl2l);
3381:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3382:   for (i=0;i<pcbddc->n_local_subs;i++) {
3383:     ISDestroy(&pcbddc->local_subs[i]);
3384:   }
3385:   pcbddc->n_local_subs = 0;
3386:   PetscFree(pcbddc->local_subs);
3387:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3388:   pcbddc->graphanalyzed        = PETSC_FALSE;
3389:   pcbddc->recompute_topography = PETSC_TRUE;
3390:   return(0);
3391: }

3393: PetscErrorCode PCBDDCResetSolvers(PC pc)
3394: {
3395:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3399:   VecDestroy(&pcbddc->coarse_vec);
3400:   if (pcbddc->coarse_phi_B) {
3401:     PetscScalar *array;
3402:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3403:     PetscFree(array);
3404:   }
3405:   MatDestroy(&pcbddc->coarse_phi_B);
3406:   MatDestroy(&pcbddc->coarse_phi_D);
3407:   MatDestroy(&pcbddc->coarse_psi_B);
3408:   MatDestroy(&pcbddc->coarse_psi_D);
3409:   VecDestroy(&pcbddc->vec1_P);
3410:   VecDestroy(&pcbddc->vec1_C);
3411:   MatDestroy(&pcbddc->local_auxmat2);
3412:   MatDestroy(&pcbddc->local_auxmat1);
3413:   VecDestroy(&pcbddc->vec1_R);
3414:   VecDestroy(&pcbddc->vec2_R);
3415:   ISDestroy(&pcbddc->is_R_local);
3416:   VecScatterDestroy(&pcbddc->R_to_B);
3417:   VecScatterDestroy(&pcbddc->R_to_D);
3418:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3419:   KSPReset(pcbddc->ksp_D);
3420:   KSPReset(pcbddc->ksp_R);
3421:   KSPReset(pcbddc->coarse_ksp);
3422:   MatDestroy(&pcbddc->local_mat);
3423:   PetscFree(pcbddc->primal_indices_local_idxs);
3424:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3425:   PetscFree(pcbddc->global_primal_indices);
3426:   ISDestroy(&pcbddc->coarse_subassembling);
3427:   MatDestroy(&pcbddc->benign_change);
3428:   VecDestroy(&pcbddc->benign_vec);
3429:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3430:   MatDestroy(&pcbddc->benign_B0);
3431:   PetscSFDestroy(&pcbddc->benign_sf);
3432:   if (pcbddc->benign_zerodiag_subs) {
3433:     PetscInt i;
3434:     for (i=0;i<pcbddc->benign_n;i++) {
3435:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3436:     }
3437:     PetscFree(pcbddc->benign_zerodiag_subs);
3438:   }
3439:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3440:   return(0);
3441: }

3443: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3444: {
3445:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3446:   PC_IS          *pcis = (PC_IS*)pc->data;
3447:   VecType        impVecType;
3448:   PetscInt       n_constraints,n_R,old_size;

3452:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3453:   n_R = pcis->n - pcbddc->n_vertices;
3454:   VecGetType(pcis->vec1_N,&impVecType);
3455:   /* local work vectors (try to avoid unneeded work)*/
3456:   /* R nodes */
3457:   old_size = -1;
3458:   if (pcbddc->vec1_R) {
3459:     VecGetSize(pcbddc->vec1_R,&old_size);
3460:   }
3461:   if (n_R != old_size) {
3462:     VecDestroy(&pcbddc->vec1_R);
3463:     VecDestroy(&pcbddc->vec2_R);
3464:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3465:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3466:     VecSetType(pcbddc->vec1_R,impVecType);
3467:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3468:   }
3469:   /* local primal dofs */
3470:   old_size = -1;
3471:   if (pcbddc->vec1_P) {
3472:     VecGetSize(pcbddc->vec1_P,&old_size);
3473:   }
3474:   if (pcbddc->local_primal_size != old_size) {
3475:     VecDestroy(&pcbddc->vec1_P);
3476:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3477:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3478:     VecSetType(pcbddc->vec1_P,impVecType);
3479:   }
3480:   /* local explicit constraints */
3481:   old_size = -1;
3482:   if (pcbddc->vec1_C) {
3483:     VecGetSize(pcbddc->vec1_C,&old_size);
3484:   }
3485:   if (n_constraints && n_constraints != old_size) {
3486:     VecDestroy(&pcbddc->vec1_C);
3487:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3488:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3489:     VecSetType(pcbddc->vec1_C,impVecType);
3490:   }
3491:   return(0);
3492: }

3494: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3495: {
3496:   PetscErrorCode  ierr;
3497:   /* pointers to pcis and pcbddc */
3498:   PC_IS*          pcis = (PC_IS*)pc->data;
3499:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3500:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3501:   /* submatrices of local problem */
3502:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3503:   /* submatrices of local coarse problem */
3504:   Mat             S_VV,S_CV,S_VC,S_CC;
3505:   /* working matrices */
3506:   Mat             C_CR;
3507:   /* additional working stuff */
3508:   PC              pc_R;
3509:   Mat             F,Brhs = NULL;
3510:   Vec             dummy_vec;
3511:   PetscBool       isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3512:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3513:   PetscScalar     *work;
3514:   PetscInt        *idx_V_B;
3515:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3516:   PetscInt        i,n_R,n_D,n_B;

3518:   /* some shortcuts to scalars */
3519:   PetscScalar     one=1.0,m_one=-1.0;

3522:   if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");

3524:   /* Set Non-overlapping dimensions */
3525:   n_vertices = pcbddc->n_vertices;
3526:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3527:   n_B = pcis->n_B;
3528:   n_D = pcis->n - n_B;
3529:   n_R = pcis->n - n_vertices;

3531:   /* vertices in boundary numbering */
3532:   PetscMalloc1(n_vertices,&idx_V_B);
3533:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3534:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);

3536:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3537:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3538:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3539:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3540:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3541:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3542:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3543:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3544:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3545:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

3547:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3548:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3549:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3550:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3551:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3552:   lda_rhs = n_R;
3553:   need_benign_correction = PETSC_FALSE;
3554:   if (isLU || isILU || isCHOL) {
3555:     PCFactorGetMatrix(pc_R,&F);
3556:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3557:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3558:     MatFactorType      type;

3560:     F = reuse_solver->F;
3561:     MatGetFactorType(F,&type);
3562:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3563:     MatGetSize(F,&lda_rhs,NULL);
3564:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3565:   } else {
3566:     F = NULL;
3567:   }

3569:   /* determine if we can use a sparse right-hand side */
3570:   sparserhs = PETSC_FALSE;
3571:   if (F) {
3572:     const MatSolverPackage solver;

3574:     MatFactorGetSolverPackage(F,&solver);
3575:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3576:   }

3578:   /* allocate workspace */
3579:   n = 0;
3580:   if (n_constraints) {
3581:     n += lda_rhs*n_constraints;
3582:   }
3583:   if (n_vertices) {
3584:     n = PetscMax(2*lda_rhs*n_vertices,n);
3585:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3586:   }
3587:   if (!pcbddc->symmetric_primal) {
3588:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3589:   }
3590:   PetscMalloc1(n,&work);

3592:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3593:   dummy_vec = NULL;
3594:   if (need_benign_correction && lda_rhs != n_R && F) {
3595:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);
3596:   }

3598:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3599:   if (n_constraints) {
3600:     Mat         M1,M2,M3,C_B;
3601:     IS          is_aux;
3602:     PetscScalar *array,*array2;

3604:     MatDestroy(&pcbddc->local_auxmat1);
3605:     MatDestroy(&pcbddc->local_auxmat2);

3607:     /* Extract constraints on R nodes: C_{CR}  */
3608:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3609:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3610:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

3612:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3613:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3614:     if (!sparserhs) {
3615:       PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
3616:       for (i=0;i<n_constraints;i++) {
3617:         const PetscScalar *row_cmat_values;
3618:         const PetscInt    *row_cmat_indices;
3619:         PetscInt          size_of_constraint,j;

3621:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3622:         for (j=0;j<size_of_constraint;j++) {
3623:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3624:         }
3625:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3626:       }
3627:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3628:     } else {
3629:       Mat tC_CR;

3631:       MatScale(C_CR,-1.0);
3632:       if (lda_rhs != n_R) {
3633:         PetscScalar *aa;
3634:         PetscInt    r,*ii,*jj;
3635:         PetscBool   done;

3637:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3638:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3639:         MatSeqAIJGetArray(C_CR,&aa);
3640:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
3641:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3642:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3643:       } else {
3644:         PetscObjectReference((PetscObject)C_CR);
3645:         tC_CR = C_CR;
3646:       }
3647:       MatCreateTranspose(tC_CR,&Brhs);
3648:       MatDestroy(&tC_CR);
3649:     }
3650:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
3651:     if (F) {
3652:       if (need_benign_correction) {
3653:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3655:         /* rhs is already zero on interior dofs, no need to change the rhs */
3656:         PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
3657:       }
3658:       MatMatSolve(F,Brhs,local_auxmat2_R);
3659:       if (need_benign_correction) {
3660:         PetscScalar        *marr;
3661:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3663:         MatDenseGetArray(local_auxmat2_R,&marr);
3664:         if (lda_rhs != n_R) {
3665:           for (i=0;i<n_constraints;i++) {
3666:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3667:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3668:             VecResetArray(dummy_vec);
3669:           }
3670:         } else {
3671:           for (i=0;i<n_constraints;i++) {
3672:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3673:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3674:             VecResetArray(pcbddc->vec1_R);
3675:           }
3676:         }
3677:         MatDenseRestoreArray(local_auxmat2_R,&marr);
3678:       }
3679:     } else {
3680:       PetscScalar *marr;

3682:       MatDenseGetArray(local_auxmat2_R,&marr);
3683:       for (i=0;i<n_constraints;i++) {
3684:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
3685:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
3686:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3687:         VecResetArray(pcbddc->vec1_R);
3688:         VecResetArray(pcbddc->vec2_R);
3689:       }
3690:       MatDenseRestoreArray(local_auxmat2_R,&marr);
3691:     }
3692:     if (sparserhs) {
3693:       MatScale(C_CR,-1.0);
3694:     }
3695:     MatDestroy(&Brhs);
3696:     if (!pcbddc->switch_static) {
3697:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
3698:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
3699:       MatDenseGetArray(local_auxmat2_R,&array2);
3700:       for (i=0;i<n_constraints;i++) {
3701:         VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
3702:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
3703:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3704:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3705:         VecResetArray(pcis->vec1_B);
3706:         VecResetArray(pcbddc->vec1_R);
3707:       }
3708:       MatDenseRestoreArray(local_auxmat2_R,&array2);
3709:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
3710:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
3711:     } else {
3712:       if (lda_rhs != n_R) {
3713:         IS dummy;

3715:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
3716:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
3717:         ISDestroy(&dummy);
3718:       } else {
3719:         PetscObjectReference((PetscObject)local_auxmat2_R);
3720:         pcbddc->local_auxmat2 = local_auxmat2_R;
3721:       }
3722:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
3723:     }
3724:     ISDestroy(&is_aux);
3725:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
3726:     MatScale(M3,m_one);
3727:     MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);
3728:     MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);
3729:     if (isCHOL) {
3730:       MatCholeskyFactor(M3,NULL,NULL);
3731:     } else {
3732:       MatLUFactor(M3,NULL,NULL,NULL);
3733:     }
3734:     VecSet(pcbddc->vec1_C,one);
3735:     MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);
3736:     MatMatSolve(M3,M2,M1);
3737:     MatDestroy(&M2);
3738:     MatDestroy(&M3);
3739:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
3740:     MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
3741:     MatDestroy(&C_B);
3742:     MatCopy(M1,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
3743:     MatDestroy(&M1);
3744:   }

3746:   /* Get submatrices from subdomain matrix */
3747:   if (n_vertices) {
3748:     IS        is_aux;
3749:     PetscBool isseqaij;

3751:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
3752:       IS tis;

3754:       ISDuplicate(pcbddc->is_R_local,&tis);
3755:       ISSort(tis);
3756:       ISComplement(tis,0,pcis->n,&is_aux);
3757:       ISDestroy(&tis);
3758:     } else {
3759:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
3760:     }
3761:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
3762:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
3763:     PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
3764:     if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
3765:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
3766:     }
3767:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
3768:     ISDestroy(&is_aux);
3769:   }

3771:   /* Matrix of coarse basis functions (local) */
3772:   if (pcbddc->coarse_phi_B) {
3773:     PetscInt on_B,on_primal,on_D=n_D;
3774:     if (pcbddc->coarse_phi_D) {
3775:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
3776:     }
3777:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
3778:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
3779:       PetscScalar *marray;

3781:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
3782:       PetscFree(marray);
3783:       MatDestroy(&pcbddc->coarse_phi_B);
3784:       MatDestroy(&pcbddc->coarse_psi_B);
3785:       MatDestroy(&pcbddc->coarse_phi_D);
3786:       MatDestroy(&pcbddc->coarse_psi_D);
3787:     }
3788:   }

3790:   if (!pcbddc->coarse_phi_B) {
3791:     PetscScalar *marr;

3793:     /* memory size */
3794:     n = n_B*pcbddc->local_primal_size;
3795:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
3796:     if (!pcbddc->symmetric_primal) n *= 2;
3797:     PetscCalloc1(n,&marr);
3798:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
3799:     marr += n_B*pcbddc->local_primal_size;
3800:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
3801:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
3802:       marr += n_D*pcbddc->local_primal_size;
3803:     }
3804:     if (!pcbddc->symmetric_primal) {
3805:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
3806:       marr += n_B*pcbddc->local_primal_size;
3807:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
3808:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
3809:       }
3810:     } else {
3811:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
3812:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
3813:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
3814:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
3815:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
3816:       }
3817:     }
3818:   }

3820:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
3821:   p0_lidx_I = NULL;
3822:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
3823:     const PetscInt *idxs;

3825:     ISGetIndices(pcis->is_I_local,&idxs);
3826:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
3827:     for (i=0;i<pcbddc->benign_n;i++) {
3828:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
3829:     }
3830:     ISRestoreIndices(pcis->is_I_local,&idxs);
3831:   }

3833:   /* vertices */
3834:   if (n_vertices) {
3835:     PetscBool restoreavr = PETSC_FALSE;

3837:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

3839:     if (n_R) {
3840:       Mat          A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
3841:       PetscBLASInt B_N,B_one = 1;
3842:       PetscScalar  *x,*y;

3844:       MatScale(A_RV,m_one);
3845:       if (need_benign_correction) {
3846:         ISLocalToGlobalMapping RtoN;
3847:         IS                     is_p0;
3848:         PetscInt               *idxs_p0,n;

3850:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
3851:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
3852:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
3853:         if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n);
3854:         ISLocalToGlobalMappingDestroy(&RtoN);
3855:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
3856:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
3857:         ISDestroy(&is_p0);
3858:       }

3860:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
3861:       if (!sparserhs || need_benign_correction) {
3862:         if (lda_rhs == n_R) {
3863:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
3864:         } else {
3865:           PetscScalar    *av,*array;
3866:           const PetscInt *xadj,*adjncy;
3867:           PetscInt       n;
3868:           PetscBool      flg_row;

3870:           array = work+lda_rhs*n_vertices;
3871:           PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
3872:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
3873:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
3874:           MatSeqAIJGetArray(A_RV,&av);
3875:           for (i=0;i<n;i++) {
3876:             PetscInt j;
3877:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
3878:           }
3879:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
3880:           MatDestroy(&A_RV);
3881:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
3882:         }
3883:         if (need_benign_correction) {
3884:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3885:           PetscScalar        *marr;

3887:           MatDenseGetArray(A_RV,&marr);
3888:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

3890:                  | 0 0  0 | (V)
3891:              L = | 0 0 -1 | (P-p0)
3892:                  | 0 0 -1 | (p0)

3894:           */
3895:           for (i=0;i<reuse_solver->benign_n;i++) {
3896:             const PetscScalar *vals;
3897:             const PetscInt    *idxs,*idxs_zero;
3898:             PetscInt          n,j,nz;

3900:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
3901:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
3902:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
3903:             for (j=0;j<n;j++) {
3904:               PetscScalar val = vals[j];
3905:               PetscInt    k,col = idxs[j];
3906:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
3907:             }
3908:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
3909:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
3910:           }
3911:           MatDenseRestoreArray(A_RV,&marr);
3912:         }
3913:         PetscObjectReference((PetscObject)A_RV);
3914:         Brhs = A_RV;
3915:       } else {
3916:         Mat tA_RVT,A_RVT;

3918:         if (!pcbddc->symmetric_primal) {
3919:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
3920:         } else {
3921:           restoreavr = PETSC_TRUE;
3922:           MatScale(A_VR,-1.0);
3923:           PetscObjectReference((PetscObject)A_VR);
3924:           A_RVT = A_VR;
3925:         }
3926:         if (lda_rhs != n_R) {
3927:           PetscScalar *aa;
3928:           PetscInt    r,*ii,*jj;
3929:           PetscBool   done;

3931:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3932:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3933:           MatSeqAIJGetArray(A_RVT,&aa);
3934:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
3935:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3936:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3937:         } else {
3938:           PetscObjectReference((PetscObject)A_RVT);
3939:           tA_RVT = A_RVT;
3940:         }
3941:         MatCreateTranspose(tA_RVT,&Brhs);
3942:         MatDestroy(&tA_RVT);
3943:         MatDestroy(&A_RVT);
3944:       }
3945:       if (F) {
3946:         /* need to correct the rhs */
3947:         if (need_benign_correction) {
3948:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3949:           PetscScalar        *marr;

3951:           MatDenseGetArray(Brhs,&marr);
3952:           if (lda_rhs != n_R) {
3953:             for (i=0;i<n_vertices;i++) {
3954:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3955:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
3956:               VecResetArray(dummy_vec);
3957:             }
3958:           } else {
3959:             for (i=0;i<n_vertices;i++) {
3960:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3961:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
3962:               VecResetArray(pcbddc->vec1_R);
3963:             }
3964:           }
3965:           MatDenseRestoreArray(Brhs,&marr);
3966:         }
3967:         MatMatSolve(F,Brhs,A_RRmA_RV);
3968:         if (restoreavr) {
3969:           MatScale(A_VR,-1.0);
3970:         }
3971:         /* need to correct the solution */
3972:         if (need_benign_correction) {
3973:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3974:           PetscScalar        *marr;

3976:           MatDenseGetArray(A_RRmA_RV,&marr);
3977:           if (lda_rhs != n_R) {
3978:             for (i=0;i<n_vertices;i++) {
3979:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3980:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3981:               VecResetArray(dummy_vec);
3982:             }
3983:           } else {
3984:             for (i=0;i<n_vertices;i++) {
3985:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3986:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3987:               VecResetArray(pcbddc->vec1_R);
3988:             }
3989:           }
3990:           MatDenseRestoreArray(A_RRmA_RV,&marr);
3991:         }
3992:       } else {
3993:         MatDenseGetArray(Brhs,&y);
3994:         for (i=0;i<n_vertices;i++) {
3995:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
3996:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
3997:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3998:           VecResetArray(pcbddc->vec1_R);
3999:           VecResetArray(pcbddc->vec2_R);
4000:         }
4001:         MatDenseRestoreArray(Brhs,&y);
4002:       }
4003:       MatDestroy(&A_RV);
4004:       MatDestroy(&Brhs);
4005:       /* S_VV and S_CV */
4006:       if (n_constraints) {
4007:         Mat B;

4009:         PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4010:         for (i=0;i<n_vertices;i++) {
4011:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4012:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4013:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4014:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4015:           VecResetArray(pcis->vec1_B);
4016:           VecResetArray(pcbddc->vec1_R);
4017:         }
4018:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4019:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4020:         MatDestroy(&B);
4021:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4022:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4023:         MatScale(S_CV,m_one);
4024:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4025:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4026:         MatDestroy(&B);
4027:       }
4028:       if (lda_rhs != n_R) {
4029:         MatDestroy(&A_RRmA_RV);
4030:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4031:         MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4032:       }
4033:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4034:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4035:       if (need_benign_correction) {
4036:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4037:         PetscScalar      *marr,*sums;

4039:         PetscMalloc1(n_vertices,&sums);
4040:         MatDenseGetArray(S_VVt,&marr);
4041:         for (i=0;i<reuse_solver->benign_n;i++) {
4042:           const PetscScalar *vals;
4043:           const PetscInt    *idxs,*idxs_zero;
4044:           PetscInt          n,j,nz;

4046:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4047:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4048:           for (j=0;j<n_vertices;j++) {
4049:             PetscInt k;
4050:             sums[j] = 0.;
4051:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4052:           }
4053:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4054:           for (j=0;j<n;j++) {
4055:             PetscScalar val = vals[j];
4056:             PetscInt k;
4057:             for (k=0;k<n_vertices;k++) {
4058:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4059:             }
4060:           }
4061:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4062:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4063:         }
4064:         PetscFree(sums);
4065:         MatDenseRestoreArray(S_VVt,&marr);
4066:         MatDestroy(&A_RV_bcorr);
4067:       }
4068:       MatDestroy(&A_RRmA_RV);
4069:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4070:       MatDenseGetArray(A_VV,&x);
4071:       MatDenseGetArray(S_VVt,&y);
4072:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4073:       MatDenseRestoreArray(A_VV,&x);
4074:       MatDenseRestoreArray(S_VVt,&y);
4075:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4076:       MatDestroy(&S_VVt);
4077:     } else {
4078:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4079:     }
4080:     MatDestroy(&A_VV);

4082:     /* coarse basis functions */
4083:     for (i=0;i<n_vertices;i++) {
4084:       PetscScalar *y;

4086:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4087:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4088:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4089:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4090:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4091:       y[n_B*i+idx_V_B[i]] = 1.0;
4092:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4093:       VecResetArray(pcis->vec1_B);

4095:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4096:         PetscInt j;

4098:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4099:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4100:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4101:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4102:         VecResetArray(pcis->vec1_D);
4103:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4104:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4105:       }
4106:       VecResetArray(pcbddc->vec1_R);
4107:     }
4108:     /* if n_R == 0 the object is not destroyed */
4109:     MatDestroy(&A_RV);
4110:   }
4111:   VecDestroy(&dummy_vec);

4113:   if (n_constraints) {
4114:     Mat B;

4116:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4117:     MatScale(S_CC,m_one);
4118:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4119:     MatScale(S_CC,m_one);
4120:     if (n_vertices) {
4121:       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4122:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4123:       } else {
4124:         Mat S_VCt;

4126:         if (lda_rhs != n_R) {
4127:           MatDestroy(&B);
4128:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4129:           MatSeqDenseSetLDA(B,lda_rhs);
4130:         }
4131:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4132:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4133:         MatDestroy(&S_VCt);
4134:       }
4135:     }
4136:     MatDestroy(&B);
4137:     /* coarse basis functions */
4138:     for (i=0;i<n_constraints;i++) {
4139:       PetscScalar *y;

4141:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4142:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4143:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4144:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4145:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4146:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4147:       VecResetArray(pcis->vec1_B);
4148:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4149:         PetscInt j;

4151:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4152:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4153:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4154:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4155:         VecResetArray(pcis->vec1_D);
4156:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4157:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4158:       }
4159:       VecResetArray(pcbddc->vec1_R);
4160:     }
4161:   }
4162:   if (n_constraints) {
4163:     MatDestroy(&local_auxmat2_R);
4164:   }
4165:   PetscFree(p0_lidx_I);

4167:   /* coarse matrix entries relative to B_0 */
4168:   if (pcbddc->benign_n) {
4169:     Mat         B0_B,B0_BPHI;
4170:     IS          is_dummy;
4171:     PetscScalar *data;
4172:     PetscInt    j;

4174:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4175:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4176:     ISDestroy(&is_dummy);
4177:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4178:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4179:     MatDenseGetArray(B0_BPHI,&data);
4180:     for (j=0;j<pcbddc->benign_n;j++) {
4181:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4182:       for (i=0;i<pcbddc->local_primal_size;i++) {
4183:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4184:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4185:       }
4186:     }
4187:     MatDenseRestoreArray(B0_BPHI,&data);
4188:     MatDestroy(&B0_B);
4189:     MatDestroy(&B0_BPHI);
4190:   }

4192:   /* compute other basis functions for non-symmetric problems */
4193:   if (!pcbddc->symmetric_primal) {
4194:     Mat         B_V=NULL,B_C=NULL;
4195:     PetscScalar *marray;

4197:     if (n_constraints) {
4198:       Mat S_CCT,C_CRT;

4200:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4201:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4202:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4203:       MatDestroy(&S_CCT);
4204:       if (n_vertices) {
4205:         Mat S_VCT;

4207:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4208:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4209:         MatDestroy(&S_VCT);
4210:       }
4211:       MatDestroy(&C_CRT);
4212:     } else {
4213:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4214:     }
4215:     if (n_vertices && n_R) {
4216:       PetscScalar    *av,*marray;
4217:       const PetscInt *xadj,*adjncy;
4218:       PetscInt       n;
4219:       PetscBool      flg_row;

4221:       /* B_V = B_V - A_VR^T */
4222:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4223:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4224:       MatSeqAIJGetArray(A_VR,&av);
4225:       MatDenseGetArray(B_V,&marray);
4226:       for (i=0;i<n;i++) {
4227:         PetscInt j;
4228:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4229:       }
4230:       MatDenseRestoreArray(B_V,&marray);
4231:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4232:       MatDestroy(&A_VR);
4233:     }

4235:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4236:     if (n_vertices) {
4237:       MatDenseGetArray(B_V,&marray);
4238:       for (i=0;i<n_vertices;i++) {
4239:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4240:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4241:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4242:         VecResetArray(pcbddc->vec1_R);
4243:         VecResetArray(pcbddc->vec2_R);
4244:       }
4245:       MatDenseRestoreArray(B_V,&marray);
4246:     }
4247:     if (B_C) {
4248:       MatDenseGetArray(B_C,&marray);
4249:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4250:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4251:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4252:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4253:         VecResetArray(pcbddc->vec1_R);
4254:         VecResetArray(pcbddc->vec2_R);
4255:       }
4256:       MatDenseRestoreArray(B_C,&marray);
4257:     }
4258:     /* coarse basis functions */
4259:     for (i=0;i<pcbddc->local_primal_size;i++) {
4260:       PetscScalar *y;

4262:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4263:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4264:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4265:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4266:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4267:       if (i<n_vertices) {
4268:         y[n_B*i+idx_V_B[i]] = 1.0;
4269:       }
4270:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4271:       VecResetArray(pcis->vec1_B);

4273:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4274:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4275:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4276:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4277:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4278:         VecResetArray(pcis->vec1_D);
4279:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4280:       }
4281:       VecResetArray(pcbddc->vec1_R);
4282:     }
4283:     MatDestroy(&B_V);
4284:     MatDestroy(&B_C);
4285:   }

4287:   /* free memory */
4288:   PetscFree(idx_V_B);
4289:   MatDestroy(&S_VV);
4290:   MatDestroy(&S_CV);
4291:   MatDestroy(&S_VC);
4292:   MatDestroy(&S_CC);
4293:   PetscFree(work);
4294:   if (n_vertices) {
4295:     MatDestroy(&A_VR);
4296:   }
4297:   if (n_constraints) {
4298:     MatDestroy(&C_CR);
4299:   }
4300:   /* Checking coarse_sub_mat and coarse basis functios */
4301:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4302:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4303:   if (pcbddc->dbg_flag) {
4304:     Mat         coarse_sub_mat;
4305:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4306:     Mat         coarse_phi_D,coarse_phi_B;
4307:     Mat         coarse_psi_D,coarse_psi_B;
4308:     Mat         A_II,A_BB,A_IB,A_BI;
4309:     Mat         C_B,CPHI;
4310:     IS          is_dummy;
4311:     Vec         mones;
4312:     MatType     checkmattype=MATSEQAIJ;
4313:     PetscReal   real_value;

4315:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4316:       Mat A;
4317:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4318:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4319:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4320:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4321:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4322:       MatDestroy(&A);
4323:     } else {
4324:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4325:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4326:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4327:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4328:     }
4329:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4330:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4331:     if (!pcbddc->symmetric_primal) {
4332:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4333:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4334:     }
4335:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4337:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4338:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4339:     PetscViewerFlush(pcbddc->dbg_viewer);
4340:     if (!pcbddc->symmetric_primal) {
4341:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4342:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4343:       MatDestroy(&AUXMAT);
4344:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4345:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4346:       MatDestroy(&AUXMAT);
4347:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4348:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4349:       MatDestroy(&AUXMAT);
4350:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4351:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4352:       MatDestroy(&AUXMAT);
4353:     } else {
4354:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4355:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4356:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4357:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4358:       MatDestroy(&AUXMAT);
4359:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4360:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4361:       MatDestroy(&AUXMAT);
4362:     }
4363:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4364:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4365:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4366:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4367:     if (pcbddc->benign_n) {
4368:       Mat         B0_B,B0_BPHI;
4369:       PetscScalar *data,*data2;
4370:       PetscInt    j;

4372:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4373:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4374:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4375:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4376:       MatDenseGetArray(TM1,&data);
4377:       MatDenseGetArray(B0_BPHI,&data2);
4378:       for (j=0;j<pcbddc->benign_n;j++) {
4379:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4380:         for (i=0;i<pcbddc->local_primal_size;i++) {
4381:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4382:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4383:         }
4384:       }
4385:       MatDenseRestoreArray(TM1,&data);
4386:       MatDenseRestoreArray(B0_BPHI,&data2);
4387:       MatDestroy(&B0_B);
4388:       ISDestroy(&is_dummy);
4389:       MatDestroy(&B0_BPHI);
4390:     }
4391: #if 0
4392:   {
4393:     PetscViewer viewer;
4394:     char filename[256];
4395:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4396:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4397:     PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4398:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4399:     MatView(coarse_sub_mat,viewer);
4400:     PetscObjectSetName((PetscObject)TM1,"projected");
4401:     MatView(TM1,viewer);
4402:     if (save_change) {
4403:       Mat phi_B;
4404:       MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);
4405:       PetscObjectSetName((PetscObject)phi_B,"phi_B");
4406:       MatView(phi_B,viewer);
4407:       MatDestroy(&phi_B);
4408:     } else {
4409:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4410:       MatView(pcbddc->coarse_phi_B,viewer);
4411:     }
4412:     if (pcbddc->coarse_phi_D) {
4413:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4414:       MatView(pcbddc->coarse_phi_D,viewer);
4415:     }
4416:     if (pcbddc->coarse_psi_B) {
4417:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4418:       MatView(pcbddc->coarse_psi_B,viewer);
4419:     }
4420:     if (pcbddc->coarse_psi_D) {
4421:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4422:       MatView(pcbddc->coarse_psi_D,viewer);
4423:     }
4424:     PetscViewerDestroy(&viewer);
4425:   }
4426: #endif
4427:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4428:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4429:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4430:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4432:     /* check constraints */
4433:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4434:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4435:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4436:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4437:     } else {
4438:       PetscScalar *data;
4439:       Mat         tmat;
4440:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4441:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4442:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4443:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4444:       MatDestroy(&tmat);
4445:     }
4446:     MatCreateVecs(CPHI,&mones,NULL);
4447:     VecSet(mones,-1.0);
4448:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4449:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4450:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4451:     if (!pcbddc->symmetric_primal) {
4452:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4453:       VecSet(mones,-1.0);
4454:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4455:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4456:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4457:     }
4458:     MatDestroy(&C_B);
4459:     MatDestroy(&CPHI);
4460:     ISDestroy(&is_dummy);
4461:     VecDestroy(&mones);
4462:     PetscViewerFlush(pcbddc->dbg_viewer);
4463:     MatDestroy(&A_II);
4464:     MatDestroy(&A_BB);
4465:     MatDestroy(&A_IB);
4466:     MatDestroy(&A_BI);
4467:     MatDestroy(&TM1);
4468:     MatDestroy(&TM2);
4469:     MatDestroy(&TM3);
4470:     MatDestroy(&TM4);
4471:     MatDestroy(&coarse_phi_D);
4472:     MatDestroy(&coarse_phi_B);
4473:     if (!pcbddc->symmetric_primal) {
4474:       MatDestroy(&coarse_psi_D);
4475:       MatDestroy(&coarse_psi_B);
4476:     }
4477:     MatDestroy(&coarse_sub_mat);
4478:   }
4479:   /* get back data */
4480:   *coarse_submat_vals_n = coarse_submat_vals;
4481:   return(0);
4482: }

4484: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4485: {
4486:   Mat            *work_mat;
4487:   IS             isrow_s,iscol_s;
4488:   PetscBool      rsorted,csorted;
4489:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4493:   ISSorted(isrow,&rsorted);
4494:   ISSorted(iscol,&csorted);
4495:   ISGetLocalSize(isrow,&rsize);
4496:   ISGetLocalSize(iscol,&csize);

4498:   if (!rsorted) {
4499:     const PetscInt *idxs;
4500:     PetscInt *idxs_sorted,i;

4502:     PetscMalloc1(rsize,&idxs_perm_r);
4503:     PetscMalloc1(rsize,&idxs_sorted);
4504:     for (i=0;i<rsize;i++) {
4505:       idxs_perm_r[i] = i;
4506:     }
4507:     ISGetIndices(isrow,&idxs);
4508:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4509:     for (i=0;i<rsize;i++) {
4510:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4511:     }
4512:     ISRestoreIndices(isrow,&idxs);
4513:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4514:   } else {
4515:     PetscObjectReference((PetscObject)isrow);
4516:     isrow_s = isrow;
4517:   }

4519:   if (!csorted) {
4520:     if (isrow == iscol) {
4521:       PetscObjectReference((PetscObject)isrow_s);
4522:       iscol_s = isrow_s;
4523:     } else {
4524:       const PetscInt *idxs;
4525:       PetscInt       *idxs_sorted,i;

4527:       PetscMalloc1(csize,&idxs_perm_c);
4528:       PetscMalloc1(csize,&idxs_sorted);
4529:       for (i=0;i<csize;i++) {
4530:         idxs_perm_c[i] = i;
4531:       }
4532:       ISGetIndices(iscol,&idxs);
4533:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4534:       for (i=0;i<csize;i++) {
4535:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4536:       }
4537:       ISRestoreIndices(iscol,&idxs);
4538:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4539:     }
4540:   } else {
4541:     PetscObjectReference((PetscObject)iscol);
4542:     iscol_s = iscol;
4543:   }

4545:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

4547:   if (!rsorted || !csorted) {
4548:     Mat      new_mat;
4549:     IS       is_perm_r,is_perm_c;

4551:     if (!rsorted) {
4552:       PetscInt *idxs_r,i;
4553:       PetscMalloc1(rsize,&idxs_r);
4554:       for (i=0;i<rsize;i++) {
4555:         idxs_r[idxs_perm_r[i]] = i;
4556:       }
4557:       PetscFree(idxs_perm_r);
4558:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4559:     } else {
4560:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4561:     }
4562:     ISSetPermutation(is_perm_r);

4564:     if (!csorted) {
4565:       if (isrow_s == iscol_s) {
4566:         PetscObjectReference((PetscObject)is_perm_r);
4567:         is_perm_c = is_perm_r;
4568:       } else {
4569:         PetscInt *idxs_c,i;
4570:         if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4571:         PetscMalloc1(csize,&idxs_c);
4572:         for (i=0;i<csize;i++) {
4573:           idxs_c[idxs_perm_c[i]] = i;
4574:         }
4575:         PetscFree(idxs_perm_c);
4576:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4577:       }
4578:     } else {
4579:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4580:     }
4581:     ISSetPermutation(is_perm_c);

4583:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4584:     MatDestroy(&work_mat[0]);
4585:     work_mat[0] = new_mat;
4586:     ISDestroy(&is_perm_r);
4587:     ISDestroy(&is_perm_c);
4588:   }

4590:   PetscObjectReference((PetscObject)work_mat[0]);
4591:   *B = work_mat[0];
4592:   MatDestroyMatrices(1,&work_mat);
4593:   ISDestroy(&isrow_s);
4594:   ISDestroy(&iscol_s);
4595:   return(0);
4596: }

4598: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4599: {
4600:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4601:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4602:   Mat            new_mat,lA;
4603:   IS             is_local,is_global;
4604:   PetscInt       local_size;
4605:   PetscBool      isseqaij;

4609:   MatDestroy(&pcbddc->local_mat);
4610:   MatGetSize(matis->A,&local_size,NULL);
4611:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
4612:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
4613:   ISDestroy(&is_local);
4614:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
4615:   ISDestroy(&is_global);

4617:   /* check */
4618:   if (pcbddc->dbg_flag) {
4619:     Vec       x,x_change;
4620:     PetscReal error;

4622:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
4623:     VecSetRandom(x,NULL);
4624:     MatMult(ChangeOfBasisMatrix,x,x_change);
4625:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4626:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4627:     MatMult(new_mat,matis->x,matis->y);
4628:     if (!pcbddc->change_interior) {
4629:       const PetscScalar *x,*y,*v;
4630:       PetscReal         lerror = 0.;
4631:       PetscInt          i;

4633:       VecGetArrayRead(matis->x,&x);
4634:       VecGetArrayRead(matis->y,&y);
4635:       VecGetArrayRead(matis->counter,&v);
4636:       for (i=0;i<local_size;i++)
4637:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
4638:           lerror = PetscAbsScalar(x[i]-y[i]);
4639:       VecRestoreArrayRead(matis->x,&x);
4640:       VecRestoreArrayRead(matis->y,&y);
4641:       VecRestoreArrayRead(matis->counter,&v);
4642:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
4643:       if (error > PETSC_SMALL) {
4644:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4645:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error);
4646:         } else {
4647:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error);
4648:         }
4649:       }
4650:     }
4651:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4652:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4653:     VecAXPY(x,-1.0,x_change);
4654:     VecNorm(x,NORM_INFINITY,&error);
4655:     if (error > PETSC_SMALL) {
4656:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4657:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
4658:       } else {
4659:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error);
4660:       }
4661:     }
4662:     VecDestroy(&x);
4663:     VecDestroy(&x_change);
4664:   }

4666:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4667:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

4669:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4670:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
4671:   if (isseqaij) {
4672:     MatDestroy(&pcbddc->local_mat);
4673:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4674:     if (lA) {
4675:       Mat work;
4676:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4677:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4678:       MatDestroy(&work);
4679:     }
4680:   } else {
4681:     Mat work_mat;

4683:     MatDestroy(&pcbddc->local_mat);
4684:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4685:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4686:     MatDestroy(&work_mat);
4687:     if (lA) {
4688:       Mat work;
4689:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4690:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4691:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4692:       MatDestroy(&work);
4693:     }
4694:   }
4695:   if (matis->A->symmetric_set) {
4696:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
4697: #if !defined(PETSC_USE_COMPLEX)
4698:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
4699: #endif
4700:   }
4701:   MatDestroy(&new_mat);
4702:   return(0);
4703: }

4705: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4706: {
4707:   PC_IS*          pcis = (PC_IS*)(pc->data);
4708:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
4709:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4710:   PetscInt        *idx_R_local=NULL;
4711:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
4712:   PetscInt        vbs,bs;
4713:   PetscBT         bitmask=NULL;
4714:   PetscErrorCode  ierr;

4717:   /*
4718:     No need to setup local scatters if
4719:       - primal space is unchanged
4720:         AND
4721:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4722:         AND
4723:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
4724:   */
4725:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
4726:     return(0);
4727:   }
4728:   /* destroy old objects */
4729:   ISDestroy(&pcbddc->is_R_local);
4730:   VecScatterDestroy(&pcbddc->R_to_B);
4731:   VecScatterDestroy(&pcbddc->R_to_D);
4732:   /* Set Non-overlapping dimensions */
4733:   n_B = pcis->n_B;
4734:   n_D = pcis->n - n_B;
4735:   n_vertices = pcbddc->n_vertices;

4737:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

4739:   /* create auxiliary bitmask and allocate workspace */
4740:   if (!sub_schurs || !sub_schurs->reuse_solver) {
4741:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
4742:     PetscBTCreate(pcis->n,&bitmask);
4743:     for (i=0;i<n_vertices;i++) {
4744:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
4745:     }

4747:     for (i=0, n_R=0; i<pcis->n; i++) {
4748:       if (!PetscBTLookup(bitmask,i)) {
4749:         idx_R_local[n_R++] = i;
4750:       }
4751:     }
4752:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
4753:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4755:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
4756:     ISGetLocalSize(reuse_solver->is_R,&n_R);
4757:   }

4759:   /* Block code */
4760:   vbs = 1;
4761:   MatGetBlockSize(pcbddc->local_mat,&bs);
4762:   if (bs>1 && !(n_vertices%bs)) {
4763:     PetscBool is_blocked = PETSC_TRUE;
4764:     PetscInt  *vary;
4765:     if (!sub_schurs || !sub_schurs->reuse_solver) {
4766:       PetscMalloc1(pcis->n/bs,&vary);
4767:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
4768:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
4769:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
4770:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
4771:       for (i=0; i<pcis->n/bs; i++) {
4772:         if (vary[i]!=0 && vary[i]!=bs) {
4773:           is_blocked = PETSC_FALSE;
4774:           break;
4775:         }
4776:       }
4777:       PetscFree(vary);
4778:     } else {
4779:       /* Verify directly the R set */
4780:       for (i=0; i<n_R/bs; i++) {
4781:         PetscInt j,node=idx_R_local[bs*i];
4782:         for (j=1; j<bs; j++) {
4783:           if (node != idx_R_local[bs*i+j]-j) {
4784:             is_blocked = PETSC_FALSE;
4785:             break;
4786:           }
4787:         }
4788:       }
4789:     }
4790:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
4791:       vbs = bs;
4792:       for (i=0;i<n_R/vbs;i++) {
4793:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
4794:       }
4795:     }
4796:   }
4797:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
4798:   if (sub_schurs && sub_schurs->reuse_solver) {
4799:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4801:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
4802:     ISDestroy(&reuse_solver->is_R);
4803:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
4804:     reuse_solver->is_R = pcbddc->is_R_local;
4805:   } else {
4806:     PetscFree(idx_R_local);
4807:   }

4809:   /* print some info if requested */
4810:   if (pcbddc->dbg_flag) {
4811:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4812:     PetscViewerFlush(pcbddc->dbg_viewer);
4813:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4814:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
4815:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
4816:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
4817:     PetscViewerFlush(pcbddc->dbg_viewer);
4818:   }

4820:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
4821:   if (!sub_schurs || !sub_schurs->reuse_solver) {
4822:     IS       is_aux1,is_aux2;
4823:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

4825:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
4826:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
4827:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
4828:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
4829:     for (i=0; i<n_D; i++) {
4830:       PetscBTSet(bitmask,is_indices[i]);
4831:     }
4832:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
4833:     for (i=0, j=0; i<n_R; i++) {
4834:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
4835:         aux_array1[j++] = i;
4836:       }
4837:     }
4838:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
4839:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
4840:     for (i=0, j=0; i<n_B; i++) {
4841:       if (!PetscBTLookup(bitmask,is_indices[i])) {
4842:         aux_array2[j++] = i;
4843:       }
4844:     }
4845:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
4846:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
4847:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
4848:     ISDestroy(&is_aux1);
4849:     ISDestroy(&is_aux2);

4851:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4852:       PetscMalloc1(n_D,&aux_array1);
4853:       for (i=0, j=0; i<n_R; i++) {
4854:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
4855:           aux_array1[j++] = i;
4856:         }
4857:       }
4858:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
4859:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
4860:       ISDestroy(&is_aux1);
4861:     }
4862:     PetscBTDestroy(&bitmask);
4863:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
4864:   } else {
4865:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4866:     IS                 tis;
4867:     PetscInt           schur_size;

4869:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
4870:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
4871:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
4872:     ISDestroy(&tis);
4873:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4874:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
4875:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
4876:       ISDestroy(&tis);
4877:     }
4878:   }
4879:   return(0);
4880: }


4883: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
4884: {
4885:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
4886:   PC_IS          *pcis = (PC_IS*)pc->data;
4887:   PC             pc_temp;
4888:   Mat            A_RR;
4889:   MatReuse       reuse;
4890:   PetscScalar    m_one = -1.0;
4891:   PetscReal      value;
4892:   PetscInt       n_D,n_R;
4893:   PetscBool      check_corr[2],issbaij;
4895:   /* prefixes stuff */
4896:   char           dir_prefix[256],neu_prefix[256],str_level[16];
4897:   size_t         len;


4901:   /* compute prefixes */
4902:   PetscStrcpy(dir_prefix,"");
4903:   PetscStrcpy(neu_prefix,"");
4904:   if (!pcbddc->current_level) {
4905:     PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
4906:     PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
4907:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
4908:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
4909:   } else {
4910:     PetscStrcpy(str_level,"");
4911:     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4912:     PetscStrlen(((PetscObject)pc)->prefix,&len);
4913:     len -= 15; /* remove "pc_bddc_coarse_" */
4914:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4915:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4916:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
4917:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
4918:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
4919:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
4920:     PetscStrcat(dir_prefix,str_level);
4921:     PetscStrcat(neu_prefix,str_level);
4922:   }

4924:   /* DIRICHLET PROBLEM */
4925:   if (dirichlet) {
4926:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4927:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4928:       if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n");
4929:       if (pcbddc->dbg_flag) {
4930:         Mat    A_IIn;

4932:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
4933:         MatDestroy(&pcis->A_II);
4934:         pcis->A_II = A_IIn;
4935:       }
4936:     }
4937:     if (pcbddc->local_mat->symmetric_set) {
4938:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
4939:     }
4940:     /* Matrix for Dirichlet problem is pcis->A_II */
4941:     n_D = pcis->n - pcis->n_B;
4942:     if (!pcbddc->ksp_D) { /* create object if not yet build */
4943:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
4944:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
4945:       /* default */
4946:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
4947:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
4948:       PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
4949:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
4950:       if (issbaij) {
4951:         PCSetType(pc_temp,PCCHOLESKY);
4952:       } else {
4953:         PCSetType(pc_temp,PCLU);
4954:       }
4955:       /* Allow user's customization */
4956:       KSPSetFromOptions(pcbddc->ksp_D);
4957:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
4958:     }
4959:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
4960:     if (sub_schurs && sub_schurs->reuse_solver) {
4961:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4963:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
4964:     }
4965:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
4966:     if (!n_D) {
4967:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
4968:       PCSetType(pc_temp,PCNONE);
4969:     }
4970:     /* Set Up KSP for Dirichlet problem of BDDC */
4971:     KSPSetUp(pcbddc->ksp_D);
4972:     /* set ksp_D into pcis data */
4973:     KSPDestroy(&pcis->ksp_D);
4974:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
4975:     pcis->ksp_D = pcbddc->ksp_D;
4976:   }

4978:   /* NEUMANN PROBLEM */
4979:   A_RR = 0;
4980:   if (neumann) {
4981:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4982:     PetscInt        ibs,mbs;
4983:     PetscBool       issbaij, reuse_neumann_solver;
4984:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

4986:     reuse_neumann_solver = PETSC_FALSE;
4987:     if (sub_schurs && sub_schurs->reuse_solver) {
4988:       IS iP;

4990:       reuse_neumann_solver = PETSC_TRUE;
4991:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
4992:       if (iP) reuse_neumann_solver = PETSC_FALSE;
4993:     }
4994:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
4995:     ISGetSize(pcbddc->is_R_local,&n_R);
4996:     if (pcbddc->ksp_R) { /* already created ksp */
4997:       PetscInt nn_R;
4998:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
4999:       PetscObjectReference((PetscObject)A_RR);
5000:       MatGetSize(A_RR,&nn_R,NULL);
5001:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5002:         KSPReset(pcbddc->ksp_R);
5003:         MatDestroy(&A_RR);
5004:         reuse = MAT_INITIAL_MATRIX;
5005:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5006:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5007:           MatDestroy(&A_RR);
5008:           reuse = MAT_INITIAL_MATRIX;
5009:         } else { /* safe to reuse the matrix */
5010:           reuse = MAT_REUSE_MATRIX;
5011:         }
5012:       }
5013:       /* last check */
5014:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5015:         MatDestroy(&A_RR);
5016:         reuse = MAT_INITIAL_MATRIX;
5017:       }
5018:     } else { /* first time, so we need to create the matrix */
5019:       reuse = MAT_INITIAL_MATRIX;
5020:     }
5021:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5022:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5023:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5024:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5025:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5026:       if (matis->A == pcbddc->local_mat) {
5027:         MatDestroy(&pcbddc->local_mat);
5028:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5029:       } else {
5030:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5031:       }
5032:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5033:       if (matis->A == pcbddc->local_mat) {
5034:         MatDestroy(&pcbddc->local_mat);
5035:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5036:       } else {
5037:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5038:       }
5039:     }
5040:     /* extract A_RR */
5041:     if (reuse_neumann_solver) {
5042:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5044:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5045:         MatDestroy(&A_RR);
5046:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5047:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5048:         } else {
5049:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5050:         }
5051:       } else {
5052:         MatDestroy(&A_RR);
5053:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5054:         PetscObjectReference((PetscObject)A_RR);
5055:       }
5056:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5057:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5058:     }
5059:     if (pcbddc->local_mat->symmetric_set) {
5060:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5061:     }
5062:     if (!pcbddc->ksp_R) { /* create object if not present */
5063:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5064:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5065:       /* default */
5066:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5067:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5068:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5069:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5070:       if (issbaij) {
5071:         PCSetType(pc_temp,PCCHOLESKY);
5072:       } else {
5073:         PCSetType(pc_temp,PCLU);
5074:       }
5075:       /* Allow user's customization */
5076:       KSPSetFromOptions(pcbddc->ksp_R);
5077:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
5078:     }
5079:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5080:     if (!n_R) {
5081:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5082:       PCSetType(pc_temp,PCNONE);
5083:     }
5084:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5085:     /* Reuse solver if it is present */
5086:     if (reuse_neumann_solver) {
5087:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5089:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5090:     }
5091:     /* Set Up KSP for Neumann problem of BDDC */
5092:     KSPSetUp(pcbddc->ksp_R);
5093:   }

5095:   if (pcbddc->dbg_flag) {
5096:     PetscViewerFlush(pcbddc->dbg_viewer);
5097:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5098:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5099:   }

5101:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5102:   check_corr[0] = check_corr[1] = PETSC_FALSE;
5103:   if (pcbddc->NullSpace_corr[0]) {
5104:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5105:   }
5106:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5107:     check_corr[0] = PETSC_TRUE;
5108:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5109:   }
5110:   if (neumann && pcbddc->NullSpace_corr[2]) {
5111:     check_corr[1] = PETSC_TRUE;
5112:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5113:   }

5115:   /* check Dirichlet and Neumann solvers */
5116:   if (pcbddc->dbg_flag) {
5117:     if (dirichlet) { /* Dirichlet */
5118:       VecSetRandom(pcis->vec1_D,NULL);
5119:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5120:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5121:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5122:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5123:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5124:       if (check_corr[0]) {
5125:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);
5126:       }
5127:       PetscViewerFlush(pcbddc->dbg_viewer);
5128:     }
5129:     if (neumann) { /* Neumann */
5130:       VecSetRandom(pcbddc->vec1_R,NULL);
5131:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5132:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5133:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5134:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5135:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5136:       if (check_corr[1]) {
5137:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);
5138:       }
5139:       PetscViewerFlush(pcbddc->dbg_viewer);
5140:     }
5141:   }
5142:   /* free Neumann problem's matrix */
5143:   MatDestroy(&A_RR);
5144:   return(0);
5145: }

5147: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5148: {
5149:   PetscErrorCode  ierr;
5150:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5151:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5152:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;

5155:   if (!reuse_solver) {
5156:     VecSet(pcbddc->vec1_R,0.);
5157:   }
5158:   if (!pcbddc->switch_static) {
5159:     if (applytranspose && pcbddc->local_auxmat1) {
5160:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5161:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5162:     }
5163:     if (!reuse_solver) {
5164:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5165:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5166:     } else {
5167:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5169:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5170:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5171:     }
5172:   } else {
5173:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5174:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5175:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5176:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5177:     if (applytranspose && pcbddc->local_auxmat1) {
5178:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5179:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5180:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5181:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5182:     }
5183:   }
5184:   if (!reuse_solver || pcbddc->switch_static) {
5185:     if (applytranspose) {
5186:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5187:     } else {
5188:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5189:     }
5190:   } else {
5191:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5193:     if (applytranspose) {
5194:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5195:     } else {
5196:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5197:     }
5198:   }
5199:   VecSet(inout_B,0.);
5200:   if (!pcbddc->switch_static) {
5201:     if (!reuse_solver) {
5202:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5203:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5204:     } else {
5205:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5207:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5208:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5209:     }
5210:     if (!applytranspose && pcbddc->local_auxmat1) {
5211:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5212:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5213:     }
5214:   } else {
5215:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5216:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5217:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5218:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5219:     if (!applytranspose && pcbddc->local_auxmat1) {
5220:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5221:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5222:     }
5223:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5224:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5225:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5226:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5227:   }
5228:   return(0);
5229: }

5231: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5232: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5233: {
5235:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5236:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5237:   const PetscScalar zero = 0.0;

5240:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5241:   if (!pcbddc->benign_apply_coarse_only) {
5242:     if (applytranspose) {
5243:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5244:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5245:     } else {
5246:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5247:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5248:     }
5249:   } else {
5250:     VecSet(pcbddc->vec1_P,zero);
5251:   }

5253:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5254:   if (pcbddc->benign_n) {
5255:     PetscScalar *array;
5256:     PetscInt    j;

5258:     VecGetArray(pcbddc->vec1_P,&array);
5259:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5260:     VecRestoreArray(pcbddc->vec1_P,&array);
5261:   }

5263:   /* start communications from local primal nodes to rhs of coarse solver */
5264:   VecSet(pcbddc->coarse_vec,zero);
5265:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5266:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5268:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5269:   if (pcbddc->coarse_ksp) {
5270:     Mat          coarse_mat;
5271:     Vec          rhs,sol;
5272:     MatNullSpace nullsp;
5273:     PetscBool    isbddc = PETSC_FALSE;

5275:     if (pcbddc->benign_have_null) {
5276:       PC        coarse_pc;

5278:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5279:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5280:       /* we need to propagate to coarser levels the need for a possible benign correction */
5281:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5282:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5283:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5284:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5285:       }
5286:     }
5287:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5288:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5289:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5290:     MatGetNullSpace(coarse_mat,&nullsp);
5291:     if (nullsp) {
5292:       MatNullSpaceRemove(nullsp,rhs);
5293:     }
5294:     if (applytranspose) {
5295:       if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5296:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5297:     } else {
5298:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5299:         PC        coarse_pc;

5301:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5302:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5303:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5304:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5305:       } else {
5306:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5307:       }
5308:     }
5309:     /* we don't need the benign correction at coarser levels anymore */
5310:     if (pcbddc->benign_have_null && isbddc) {
5311:       PC        coarse_pc;
5312:       PC_BDDC*  coarsepcbddc;

5314:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5315:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5316:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5317:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5318:     }
5319:     if (nullsp) {
5320:       MatNullSpaceRemove(nullsp,sol);
5321:     }
5322:   }

5324:   /* Local solution on R nodes */
5325:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5326:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5327:   }
5328:   /* communications from coarse sol to local primal nodes */
5329:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5330:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5332:   /* Sum contributions from the two levels */
5333:   if (!pcbddc->benign_apply_coarse_only) {
5334:     if (applytranspose) {
5335:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5336:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5337:     } else {
5338:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5339:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5340:     }
5341:     /* store p0 */
5342:     if (pcbddc->benign_n) {
5343:       PetscScalar *array;
5344:       PetscInt    j;

5346:       VecGetArray(pcbddc->vec1_P,&array);
5347:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5348:       VecRestoreArray(pcbddc->vec1_P,&array);
5349:     }
5350:   } else { /* expand the coarse solution */
5351:     if (applytranspose) {
5352:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5353:     } else {
5354:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5355:     }
5356:   }
5357:   return(0);
5358: }

5360: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5361: {
5363:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5364:   PetscScalar    *array;
5365:   Vec            from,to;

5368:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5369:     from = pcbddc->coarse_vec;
5370:     to = pcbddc->vec1_P;
5371:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5372:       Vec tvec;

5374:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5375:       VecResetArray(tvec);
5376:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5377:       VecGetArray(tvec,&array);
5378:       VecPlaceArray(from,array);
5379:       VecRestoreArray(tvec,&array);
5380:     }
5381:   } else { /* from local to global -> put data in coarse right hand side */
5382:     from = pcbddc->vec1_P;
5383:     to = pcbddc->coarse_vec;
5384:   }
5385:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5386:   return(0);
5387: }

5389: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5390: {
5392:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5393:   PetscScalar    *array;
5394:   Vec            from,to;

5397:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5398:     from = pcbddc->coarse_vec;
5399:     to = pcbddc->vec1_P;
5400:   } else { /* from local to global -> put data in coarse right hand side */
5401:     from = pcbddc->vec1_P;
5402:     to = pcbddc->coarse_vec;
5403:   }
5404:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5405:   if (smode == SCATTER_FORWARD) {
5406:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5407:       Vec tvec;

5409:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5410:       VecGetArray(to,&array);
5411:       VecPlaceArray(tvec,array);
5412:       VecRestoreArray(to,&array);
5413:     }
5414:   } else {
5415:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5416:      VecResetArray(from);
5417:     }
5418:   }
5419:   return(0);
5420: }

5422: /* uncomment for testing purposes */
5423: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5424: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5425: {
5426:   PetscErrorCode    ierr;
5427:   PC_IS*            pcis = (PC_IS*)(pc->data);
5428:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
5429:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
5430:   /* one and zero */
5431:   PetscScalar       one=1.0,zero=0.0;
5432:   /* space to store constraints and their local indices */
5433:   PetscScalar       *constraints_data;
5434:   PetscInt          *constraints_idxs,*constraints_idxs_B;
5435:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
5436:   PetscInt          *constraints_n;
5437:   /* iterators */
5438:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
5439:   /* BLAS integers */
5440:   PetscBLASInt      lwork,lierr;
5441:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
5442:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
5443:   /* reuse */
5444:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
5445:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
5446:   /* change of basis */
5447:   PetscBool         qr_needed;
5448:   PetscBT           change_basis,qr_needed_idx;
5449:   /* auxiliary stuff */
5450:   PetscInt          *nnz,*is_indices;
5451:   PetscInt          ncc;
5452:   /* some quantities */
5453:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
5454:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;

5457:   /* Destroy Mat objects computed previously */
5458:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5459:   MatDestroy(&pcbddc->ConstraintMatrix);
5460:   MatDestroy(&pcbddc->switch_static_change);
5461:   /* save info on constraints from previous setup (if any) */
5462:   olocal_primal_size = pcbddc->local_primal_size;
5463:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5464:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5465:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5466:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5467:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5468:   PetscFree(pcbddc->primal_indices_local_idxs);

5470:   if (!pcbddc->adaptive_selection) {
5471:     IS           ISForVertices,*ISForFaces,*ISForEdges;
5472:     MatNullSpace nearnullsp;
5473:     const Vec    *nearnullvecs;
5474:     Vec          *localnearnullsp;
5475:     PetscScalar  *array;
5476:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
5477:     PetscBool    nnsp_has_cnst;
5478:     /* LAPACK working arrays for SVD or POD */
5479:     PetscBool    skip_lapack,boolforchange;
5480:     PetscScalar  *work;
5481:     PetscReal    *singular_vals;
5482: #if defined(PETSC_USE_COMPLEX)
5483:     PetscReal    *rwork;
5484: #endif
5485: #if defined(PETSC_MISSING_LAPACK_GESVD)
5486:     PetscScalar  *temp_basis,*correlation_mat;
5487: #else
5488:     PetscBLASInt dummy_int=1;
5489:     PetscScalar  dummy_scalar=1.;
5490: #endif

5492:     /* Get index sets for faces, edges and vertices from graph */
5493:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
5494:     /* print some info */
5495:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5496:       PetscInt nv;

5498:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
5499:       ISGetSize(ISForVertices,&nv);
5500:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5501:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
5502:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
5503:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
5504:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
5505:       PetscViewerFlush(pcbddc->dbg_viewer);
5506:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5507:     }

5509:     /* free unneeded index sets */
5510:     if (!pcbddc->use_vertices) {
5511:       ISDestroy(&ISForVertices);
5512:     }
5513:     if (!pcbddc->use_edges) {
5514:       for (i=0;i<n_ISForEdges;i++) {
5515:         ISDestroy(&ISForEdges[i]);
5516:       }
5517:       PetscFree(ISForEdges);
5518:       n_ISForEdges = 0;
5519:     }
5520:     if (!pcbddc->use_faces) {
5521:       for (i=0;i<n_ISForFaces;i++) {
5522:         ISDestroy(&ISForFaces[i]);
5523:       }
5524:       PetscFree(ISForFaces);
5525:       n_ISForFaces = 0;
5526:     }

5528:     /* check if near null space is attached to global mat */
5529:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
5530:     if (nearnullsp) {
5531:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
5532:       /* remove any stored info */
5533:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
5534:       PetscFree(pcbddc->onearnullvecs_state);
5535:       /* store information for BDDC solver reuse */
5536:       PetscObjectReference((PetscObject)nearnullsp);
5537:       pcbddc->onearnullspace = nearnullsp;
5538:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
5539:       for (i=0;i<nnsp_size;i++) {
5540:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
5541:       }
5542:     } else { /* if near null space is not provided BDDC uses constants by default */
5543:       nnsp_size = 0;
5544:       nnsp_has_cnst = PETSC_TRUE;
5545:     }
5546:     /* get max number of constraints on a single cc */
5547:     max_constraints = nnsp_size;
5548:     if (nnsp_has_cnst) max_constraints++;

5550:     /*
5551:          Evaluate maximum storage size needed by the procedure
5552:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5553:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5554:          There can be multiple constraints per connected component
5555:                                                                                                                                                            */
5556:     n_vertices = 0;
5557:     if (ISForVertices) {
5558:       ISGetSize(ISForVertices,&n_vertices);
5559:     }
5560:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
5561:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

5563:     total_counts = n_ISForFaces+n_ISForEdges;
5564:     total_counts *= max_constraints;
5565:     total_counts += n_vertices;
5566:     PetscBTCreate(total_counts,&change_basis);

5568:     total_counts = 0;
5569:     max_size_of_constraint = 0;
5570:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
5571:       IS used_is;
5572:       if (i<n_ISForEdges) {
5573:         used_is = ISForEdges[i];
5574:       } else {
5575:         used_is = ISForFaces[i-n_ISForEdges];
5576:       }
5577:       ISGetSize(used_is,&j);
5578:       total_counts += j;
5579:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
5580:     }
5581:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

5583:     /* get local part of global near null space vectors */
5584:     PetscMalloc1(nnsp_size,&localnearnullsp);
5585:     for (k=0;k<nnsp_size;k++) {
5586:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
5587:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5588:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5589:     }

5591:     /* whether or not to skip lapack calls */
5592:     skip_lapack = PETSC_TRUE;
5593:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

5595:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5596:     if (!skip_lapack) {
5597:       PetscScalar temp_work;

5599: #if defined(PETSC_MISSING_LAPACK_GESVD)
5600:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5601:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
5602:       PetscMalloc1(max_constraints,&singular_vals);
5603:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
5604: #if defined(PETSC_USE_COMPLEX)
5605:       PetscMalloc1(3*max_constraints,&rwork);
5606: #endif
5607:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5608:       PetscBLASIntCast(max_constraints,&Blas_N);
5609:       PetscBLASIntCast(max_constraints,&Blas_LDA);
5610:       lwork = -1;
5611:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5612: #if !defined(PETSC_USE_COMPLEX)
5613:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
5614: #else
5615:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
5616: #endif
5617:       PetscFPTrapPop();
5618:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
5619: #else /* on missing GESVD */
5620:       /* SVD */
5621:       PetscInt max_n,min_n;
5622:       max_n = max_size_of_constraint;
5623:       min_n = max_constraints;
5624:       if (max_size_of_constraint < max_constraints) {
5625:         min_n = max_size_of_constraint;
5626:         max_n = max_constraints;
5627:       }
5628:       PetscMalloc1(min_n,&singular_vals);
5629: #if defined(PETSC_USE_COMPLEX)
5630:       PetscMalloc1(5*min_n,&rwork);
5631: #endif
5632:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5633:       lwork = -1;
5634:       PetscBLASIntCast(max_n,&Blas_M);
5635:       PetscBLASIntCast(min_n,&Blas_N);
5636:       PetscBLASIntCast(max_n,&Blas_LDA);
5637:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5638: #if !defined(PETSC_USE_COMPLEX)
5639:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
5640: #else
5641:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
5642: #endif
5643:       PetscFPTrapPop();
5644:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
5645: #endif /* on missing GESVD */
5646:       /* Allocate optimal workspace */
5647:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
5648:       PetscMalloc1(lwork,&work);
5649:     }
5650:     /* Now we can loop on constraining sets */
5651:     total_counts = 0;
5652:     constraints_idxs_ptr[0] = 0;
5653:     constraints_data_ptr[0] = 0;
5654:     /* vertices */
5655:     if (n_vertices) {
5656:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
5657:       PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
5658:       for (i=0;i<n_vertices;i++) {
5659:         constraints_n[total_counts] = 1;
5660:         constraints_data[total_counts] = 1.0;
5661:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
5662:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
5663:         total_counts++;
5664:       }
5665:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
5666:       n_vertices = total_counts;
5667:     }

5669:     /* edges and faces */
5670:     total_counts_cc = total_counts;
5671:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
5672:       IS        used_is;
5673:       PetscBool idxs_copied = PETSC_FALSE;

5675:       if (ncc<n_ISForEdges) {
5676:         used_is = ISForEdges[ncc];
5677:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
5678:       } else {
5679:         used_is = ISForFaces[ncc-n_ISForEdges];
5680:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
5681:       }
5682:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

5684:       ISGetSize(used_is,&size_of_constraint);
5685:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
5686:       /* change of basis should not be performed on local periodic nodes */
5687:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
5688:       if (nnsp_has_cnst) {
5689:         PetscScalar quad_value;

5691:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
5692:         idxs_copied = PETSC_TRUE;

5694:         if (!pcbddc->use_nnsp_true) {
5695:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
5696:         } else {
5697:           quad_value = 1.0;
5698:         }
5699:         for (j=0;j<size_of_constraint;j++) {
5700:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
5701:         }
5702:         temp_constraints++;
5703:         total_counts++;
5704:       }
5705:       for (k=0;k<nnsp_size;k++) {
5706:         PetscReal real_value;
5707:         PetscScalar *ptr_to_data;

5709:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
5710:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
5711:         for (j=0;j<size_of_constraint;j++) {
5712:           ptr_to_data[j] = array[is_indices[j]];
5713:         }
5714:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
5715:         /* check if array is null on the connected component */
5716:         PetscBLASIntCast(size_of_constraint,&Blas_N);
5717:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
5718:         if (real_value > 0.0) { /* keep indices and values */
5719:           temp_constraints++;
5720:           total_counts++;
5721:           if (!idxs_copied) {
5722:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
5723:             idxs_copied = PETSC_TRUE;
5724:           }
5725:         }
5726:       }
5727:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
5728:       valid_constraints = temp_constraints;
5729:       if (!pcbddc->use_nnsp_true && temp_constraints) {
5730:         if (temp_constraints == 1) { /* just normalize the constraint */
5731:           PetscScalar norm,*ptr_to_data;

5733:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
5734:           PetscBLASIntCast(size_of_constraint,&Blas_N);
5735:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
5736:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
5737:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
5738:         } else { /* perform SVD */
5739:           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
5740:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

5742: #if defined(PETSC_MISSING_LAPACK_GESVD)
5743:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
5744:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
5745:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
5746:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
5747:                 from that computed using LAPACKgesvd
5748:              -> This is due to a different computation of eigenvectors in LAPACKheev
5749:              -> The quality of the POD-computed basis will be the same */
5750:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
5751:           /* Store upper triangular part of correlation matrix */
5752:           PetscBLASIntCast(size_of_constraint,&Blas_N);
5753:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5754:           for (j=0;j<temp_constraints;j++) {
5755:             for (k=0;k<j+1;k++) {
5756:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
5757:             }
5758:           }
5759:           /* compute eigenvalues and eigenvectors of correlation matrix */
5760:           PetscBLASIntCast(temp_constraints,&Blas_N);
5761:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
5762: #if !defined(PETSC_USE_COMPLEX)
5763:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
5764: #else
5765:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
5766: #endif
5767:           PetscFPTrapPop();
5768:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
5769:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
5770:           j = 0;
5771:           while (j < temp_constraints && singular_vals[j] < tol) j++;
5772:           total_counts = total_counts-j;
5773:           valid_constraints = temp_constraints-j;
5774:           /* scale and copy POD basis into used quadrature memory */
5775:           PetscBLASIntCast(size_of_constraint,&Blas_M);
5776:           PetscBLASIntCast(temp_constraints,&Blas_N);
5777:           PetscBLASIntCast(temp_constraints,&Blas_K);
5778:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
5779:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
5780:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
5781:           if (j<temp_constraints) {
5782:             PetscInt ii;
5783:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
5784:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5785:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
5786:             PetscFPTrapPop();
5787:             for (k=0;k<temp_constraints-j;k++) {
5788:               for (ii=0;ii<size_of_constraint;ii++) {
5789:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
5790:               }
5791:             }
5792:           }
5793: #else  /* on missing GESVD */
5794:           PetscBLASIntCast(size_of_constraint,&Blas_M);
5795:           PetscBLASIntCast(temp_constraints,&Blas_N);
5796:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
5797:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5798: #if !defined(PETSC_USE_COMPLEX)
5799:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
5800: #else
5801:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
5802: #endif
5803:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
5804:           PetscFPTrapPop();
5805:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
5806:           k = temp_constraints;
5807:           if (k > size_of_constraint) k = size_of_constraint;
5808:           j = 0;
5809:           while (j < k && singular_vals[k-j-1] < tol) j++;
5810:           valid_constraints = k-j;
5811:           total_counts = total_counts-temp_constraints+valid_constraints;
5812: #endif /* on missing GESVD */
5813:         }
5814:       }
5815:       /* update pointers information */
5816:       if (valid_constraints) {
5817:         constraints_n[total_counts_cc] = valid_constraints;
5818:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
5819:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
5820:         /* set change_of_basis flag */
5821:         if (boolforchange) {
5822:           PetscBTSet(change_basis,total_counts_cc);
5823:         }
5824:         total_counts_cc++;
5825:       }
5826:     }
5827:     /* free workspace */
5828:     if (!skip_lapack) {
5829:       PetscFree(work);
5830: #if defined(PETSC_USE_COMPLEX)
5831:       PetscFree(rwork);
5832: #endif
5833:       PetscFree(singular_vals);
5834: #if defined(PETSC_MISSING_LAPACK_GESVD)
5835:       PetscFree(correlation_mat);
5836:       PetscFree(temp_basis);
5837: #endif
5838:     }
5839:     for (k=0;k<nnsp_size;k++) {
5840:       VecDestroy(&localnearnullsp[k]);
5841:     }
5842:     PetscFree(localnearnullsp);
5843:     /* free index sets of faces, edges and vertices */
5844:     for (i=0;i<n_ISForFaces;i++) {
5845:       ISDestroy(&ISForFaces[i]);
5846:     }
5847:     if (n_ISForFaces) {
5848:       PetscFree(ISForFaces);
5849:     }
5850:     for (i=0;i<n_ISForEdges;i++) {
5851:       ISDestroy(&ISForEdges[i]);
5852:     }
5853:     if (n_ISForEdges) {
5854:       PetscFree(ISForEdges);
5855:     }
5856:     ISDestroy(&ISForVertices);
5857:   } else {
5858:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

5860:     total_counts = 0;
5861:     n_vertices = 0;
5862:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
5863:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
5864:     }
5865:     max_constraints = 0;
5866:     total_counts_cc = 0;
5867:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
5868:       total_counts += pcbddc->adaptive_constraints_n[i];
5869:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
5870:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
5871:     }
5872:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
5873:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
5874:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
5875:     constraints_data = pcbddc->adaptive_constraints_data;
5876:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
5877:     PetscMalloc1(total_counts_cc,&constraints_n);
5878:     total_counts_cc = 0;
5879:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
5880:       if (pcbddc->adaptive_constraints_n[i]) {
5881:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
5882:       }
5883:     }
5884: #if 0
5885:     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
5886:     for (i=0;i<total_counts_cc;i++) {
5887:       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
5888:       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
5889:       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
5890:         printf(" %d",constraints_idxs[j]);
5891:       }
5892:       printf("\n");
5893:       printf("number of cc: %d\n",constraints_n[i]);
5894:     }
5895:     for (i=0;i<n_vertices;i++) {
5896:       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
5897:     }
5898:     for (i=0;i<sub_schurs->n_subs;i++) {
5899:       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
5900:     }
5901: #endif

5903:     max_size_of_constraint = 0;
5904:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
5905:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
5906:     /* Change of basis */
5907:     PetscBTCreate(total_counts_cc,&change_basis);
5908:     if (pcbddc->use_change_of_basis) {
5909:       for (i=0;i<sub_schurs->n_subs;i++) {
5910:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
5911:           PetscBTSet(change_basis,i+n_vertices);
5912:         }
5913:       }
5914:     }
5915:   }
5916:   pcbddc->local_primal_size = total_counts;
5917:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

5919:   /* map constraints_idxs in boundary numbering */
5920:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
5921:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);

5923:   /* Create constraint matrix */
5924:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
5925:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
5926:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

5928:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
5929:   /* determine if a QR strategy is needed for change of basis */
5930:   qr_needed = PETSC_FALSE;
5931:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
5932:   total_primal_vertices=0;
5933:   pcbddc->local_primal_size_cc = 0;
5934:   for (i=0;i<total_counts_cc;i++) {
5935:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5936:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
5937:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
5938:       pcbddc->local_primal_size_cc += 1;
5939:     } else if (PetscBTLookup(change_basis,i)) {
5940:       for (k=0;k<constraints_n[i];k++) {
5941:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
5942:       }
5943:       pcbddc->local_primal_size_cc += constraints_n[i];
5944:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
5945:         PetscBTSet(qr_needed_idx,i);
5946:         qr_needed = PETSC_TRUE;
5947:       }
5948:     } else {
5949:       pcbddc->local_primal_size_cc += 1;
5950:     }
5951:   }
5952:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
5953:   pcbddc->n_vertices = total_primal_vertices;
5954:   /* permute indices in order to have a sorted set of vertices */
5955:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
5956:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
5957:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
5958:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

5960:   /* nonzero structure of constraint matrix */
5961:   /* and get reference dof for local constraints */
5962:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
5963:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

5965:   j = total_primal_vertices;
5966:   total_counts = total_primal_vertices;
5967:   cum = total_primal_vertices;
5968:   for (i=n_vertices;i<total_counts_cc;i++) {
5969:     if (!PetscBTLookup(change_basis,i)) {
5970:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
5971:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
5972:       cum++;
5973:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5974:       for (k=0;k<constraints_n[i];k++) {
5975:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
5976:         nnz[j+k] = size_of_constraint;
5977:       }
5978:       j += constraints_n[i];
5979:     }
5980:   }
5981:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
5982:   PetscFree(nnz);

5984:   /* set values in constraint matrix */
5985:   for (i=0;i<total_primal_vertices;i++) {
5986:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
5987:   }
5988:   total_counts = total_primal_vertices;
5989:   for (i=n_vertices;i<total_counts_cc;i++) {
5990:     if (!PetscBTLookup(change_basis,i)) {
5991:       PetscInt *cols;

5993:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
5994:       cols = constraints_idxs+constraints_idxs_ptr[i];
5995:       for (k=0;k<constraints_n[i];k++) {
5996:         PetscInt    row = total_counts+k;
5997:         PetscScalar *vals;

5999:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6000:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6001:       }
6002:       total_counts += constraints_n[i];
6003:     }
6004:   }
6005:   /* assembling */
6006:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6007:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);

6009:   /*
6010:   PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
6011:   MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
6012:   PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
6013:   */
6014:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6015:   if (pcbddc->use_change_of_basis) {
6016:     /* dual and primal dofs on a single cc */
6017:     PetscInt     dual_dofs,primal_dofs;
6018:     /* working stuff for GEQRF */
6019:     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
6020:     PetscBLASInt lqr_work;
6021:     /* working stuff for UNGQR */
6022:     PetscScalar  *gqr_work,lgqr_work_t;
6023:     PetscBLASInt lgqr_work;
6024:     /* working stuff for TRTRS */
6025:     PetscScalar  *trs_rhs;
6026:     PetscBLASInt Blas_NRHS;
6027:     /* pointers for values insertion into change of basis matrix */
6028:     PetscInt     *start_rows,*start_cols;
6029:     PetscScalar  *start_vals;
6030:     /* working stuff for values insertion */
6031:     PetscBT      is_primal;
6032:     PetscInt     *aux_primal_numbering_B;
6033:     /* matrix sizes */
6034:     PetscInt     global_size,local_size;
6035:     /* temporary change of basis */
6036:     Mat          localChangeOfBasisMatrix;
6037:     /* extra space for debugging */
6038:     PetscScalar  *dbg_work;

6040:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6041:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6042:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6043:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6044:     /* nonzeros for local mat */
6045:     PetscMalloc1(pcis->n,&nnz);
6046:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6047:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6048:     } else {
6049:       const PetscInt *ii;
6050:       PetscInt       n;
6051:       PetscBool      flg_row;
6052:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6053:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6054:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6055:     }
6056:     for (i=n_vertices;i<total_counts_cc;i++) {
6057:       if (PetscBTLookup(change_basis,i)) {
6058:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6059:         if (PetscBTLookup(qr_needed_idx,i)) {
6060:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6061:         } else {
6062:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6063:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6064:         }
6065:       }
6066:     }
6067:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6068:     PetscFree(nnz);
6069:     /* Set interior change in the matrix */
6070:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6071:       for (i=0;i<pcis->n;i++) {
6072:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6073:       }
6074:     } else {
6075:       const PetscInt *ii,*jj;
6076:       PetscScalar    *aa;
6077:       PetscInt       n;
6078:       PetscBool      flg_row;
6079:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6080:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6081:       for (i=0;i<n;i++) {
6082:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6083:       }
6084:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6085:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6086:     }

6088:     if (pcbddc->dbg_flag) {
6089:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6090:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6091:     }


6094:     /* Now we loop on the constraints which need a change of basis */
6095:     /*
6096:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6097:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6099:        Basic blocks of change of basis matrix T computed by

6101:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6103:             | 1        0   ...        0         s_1/S |
6104:             | 0        1   ...        0         s_2/S |
6105:             |              ...                        |
6106:             | 0        ...            1     s_{n-1}/S |
6107:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6109:             with S = \sum_{i=1}^n s_i^2
6110:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6111:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6113:           - QR decomposition of constraints otherwise
6114:     */
6115:     if (qr_needed) {
6116:       /* space to store Q */
6117:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6118:       /* array to store scaling factors for reflectors */
6119:       PetscMalloc1(max_constraints,&qr_tau);
6120:       /* first we issue queries for optimal work */
6121:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6122:       PetscBLASIntCast(max_constraints,&Blas_N);
6123:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6124:       lqr_work = -1;
6125:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6126:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6127:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6128:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6129:       lgqr_work = -1;
6130:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6131:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6132:       PetscBLASIntCast(max_constraints,&Blas_K);
6133:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6134:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6135:       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6136:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
6137:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6138:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6139:       /* array to store rhs and solution of triangular solver */
6140:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6141:       /* allocating workspace for check */
6142:       if (pcbddc->dbg_flag) {
6143:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6144:       }
6145:     }
6146:     /* array to store whether a node is primal or not */
6147:     PetscBTCreate(pcis->n_B,&is_primal);
6148:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6149:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6150:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
6151:     for (i=0;i<total_primal_vertices;i++) {
6152:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6153:     }
6154:     PetscFree(aux_primal_numbering_B);

6156:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6157:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6158:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6159:       if (PetscBTLookup(change_basis,total_counts)) {
6160:         /* get constraint info */
6161:         primal_dofs = constraints_n[total_counts];
6162:         dual_dofs = size_of_constraint-primal_dofs;

6164:         if (pcbddc->dbg_flag) {
6165:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
6166:         }

6168:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6170:           /* copy quadrature constraints for change of basis check */
6171:           if (pcbddc->dbg_flag) {
6172:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6173:           }
6174:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6175:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6177:           /* compute QR decomposition of constraints */
6178:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6179:           PetscBLASIntCast(primal_dofs,&Blas_N);
6180:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6181:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6182:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6183:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6184:           PetscFPTrapPop();

6186:           /* explictly compute R^-T */
6187:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6188:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6189:           PetscBLASIntCast(primal_dofs,&Blas_N);
6190:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6191:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6192:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6193:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6194:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6195:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6196:           PetscFPTrapPop();

6198:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6199:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6200:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6201:           PetscBLASIntCast(primal_dofs,&Blas_K);
6202:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6203:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6204:           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6205:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
6206:           PetscFPTrapPop();

6208:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6209:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6210:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6211:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6212:           PetscBLASIntCast(primal_dofs,&Blas_N);
6213:           PetscBLASIntCast(primal_dofs,&Blas_K);
6214:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6215:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6216:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6217:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6218:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6219:           PetscFPTrapPop();
6220:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6222:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6223:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6224:           /* insert cols for primal dofs */
6225:           for (j=0;j<primal_dofs;j++) {
6226:             start_vals = &qr_basis[j*size_of_constraint];
6227:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6228:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6229:           }
6230:           /* insert cols for dual dofs */
6231:           for (j=0,k=0;j<dual_dofs;k++) {
6232:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6233:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6234:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6235:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6236:               j++;
6237:             }
6238:           }

6240:           /* check change of basis */
6241:           if (pcbddc->dbg_flag) {
6242:             PetscInt   ii,jj;
6243:             PetscBool valid_qr=PETSC_TRUE;
6244:             PetscBLASIntCast(primal_dofs,&Blas_M);
6245:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6246:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6247:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6248:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6249:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6250:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6251:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6252:             PetscFPTrapPop();
6253:             for (jj=0;jj<size_of_constraint;jj++) {
6254:               for (ii=0;ii<primal_dofs;ii++) {
6255:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6256:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
6257:               }
6258:             }
6259:             if (!valid_qr) {
6260:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6261:               for (jj=0;jj<size_of_constraint;jj++) {
6262:                 for (ii=0;ii<primal_dofs;ii++) {
6263:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6264:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6265:                   }
6266:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
6267:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6268:                   }
6269:                 }
6270:               }
6271:             } else {
6272:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6273:             }
6274:           }
6275:         } else { /* simple transformation block */
6276:           PetscInt    row,col;
6277:           PetscScalar val,norm;

6279:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6280:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6281:           for (j=0;j<size_of_constraint;j++) {
6282:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6283:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6284:             if (!PetscBTLookup(is_primal,row_B)) {
6285:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6286:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6287:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6288:             } else {
6289:               for (k=0;k<size_of_constraint;k++) {
6290:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6291:                 if (row != col) {
6292:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6293:                 } else {
6294:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6295:                 }
6296:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6297:               }
6298:             }
6299:           }
6300:           if (pcbddc->dbg_flag) {
6301:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6302:           }
6303:         }
6304:       } else {
6305:         if (pcbddc->dbg_flag) {
6306:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
6307:         }
6308:       }
6309:     }

6311:     /* free workspace */
6312:     if (qr_needed) {
6313:       if (pcbddc->dbg_flag) {
6314:         PetscFree(dbg_work);
6315:       }
6316:       PetscFree(trs_rhs);
6317:       PetscFree(qr_tau);
6318:       PetscFree(qr_work);
6319:       PetscFree(gqr_work);
6320:       PetscFree(qr_basis);
6321:     }
6322:     PetscBTDestroy(&is_primal);
6323:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6324:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6326:     /* assembling of global change of variable */
6327:     if (!pcbddc->fake_change) {
6328:       Mat      tmat;
6329:       PetscInt bs;

6331:       VecGetSize(pcis->vec1_global,&global_size);
6332:       VecGetLocalSize(pcis->vec1_global,&local_size);
6333:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6334:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6335:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6336:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6337:       MatGetBlockSize(pc->pmat,&bs);
6338:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6339:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6340:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6341:       MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6342:       MatDestroy(&tmat);
6343:       VecSet(pcis->vec1_global,0.0);
6344:       VecSet(pcis->vec1_N,1.0);
6345:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6346:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6347:       VecReciprocal(pcis->vec1_global);
6348:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6350:       /* check */
6351:       if (pcbddc->dbg_flag) {
6352:         PetscReal error;
6353:         Vec       x,x_change;

6355:         VecDuplicate(pcis->vec1_global,&x);
6356:         VecDuplicate(pcis->vec1_global,&x_change);
6357:         VecSetRandom(x,NULL);
6358:         VecCopy(x,pcis->vec1_global);
6359:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6360:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6361:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6362:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6363:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6364:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6365:         VecAXPY(x,-1.0,x_change);
6366:         VecNorm(x,NORM_INFINITY,&error);
6367:         if (error > PETSC_SMALL) {
6368:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
6369:         }
6370:         VecDestroy(&x);
6371:         VecDestroy(&x_change);
6372:       }
6373:       /* adapt sub_schurs computed (if any) */
6374:       if (pcbddc->use_deluxe_scaling) {
6375:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6377:         if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6378:         if (sub_schurs && sub_schurs->S_Ej_all) {
6379:           Mat                    S_new,tmat;
6380:           IS                     is_all_N,is_V_Sall = NULL;

6382:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6383:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6384:           if (pcbddc->deluxe_zerorows) {
6385:             ISLocalToGlobalMapping NtoSall;
6386:             IS                     is_V;
6387:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6388:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6389:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6390:             ISLocalToGlobalMappingDestroy(&NtoSall);
6391:             ISDestroy(&is_V);
6392:           }
6393:           ISDestroy(&is_all_N);
6394:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6395:           MatDestroy(&sub_schurs->S_Ej_all);
6396:           PetscObjectReference((PetscObject)S_new);
6397:           if (pcbddc->deluxe_zerorows) {
6398:             const PetscScalar *array;
6399:             const PetscInt    *idxs_V,*idxs_all;
6400:             PetscInt          i,n_V;

6402:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6403:             ISGetLocalSize(is_V_Sall,&n_V);
6404:             ISGetIndices(is_V_Sall,&idxs_V);
6405:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6406:             VecGetArrayRead(pcis->D,&array);
6407:             for (i=0;i<n_V;i++) {
6408:               PetscScalar val;
6409:               PetscInt    idx;

6411:               idx = idxs_V[i];
6412:               val = array[idxs_all[idxs_V[i]]];
6413:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6414:             }
6415:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6416:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6417:             VecRestoreArrayRead(pcis->D,&array);
6418:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6419:             ISRestoreIndices(is_V_Sall,&idxs_V);
6420:           }
6421:           sub_schurs->S_Ej_all = S_new;
6422:           MatDestroy(&S_new);
6423:           if (sub_schurs->sum_S_Ej_all) {
6424:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6425:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6426:             PetscObjectReference((PetscObject)S_new);
6427:             if (pcbddc->deluxe_zerorows) {
6428:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6429:             }
6430:             sub_schurs->sum_S_Ej_all = S_new;
6431:             MatDestroy(&S_new);
6432:           }
6433:           ISDestroy(&is_V_Sall);
6434:           MatDestroy(&tmat);
6435:         }
6436:         /* destroy any change of basis context in sub_schurs */
6437:         if (sub_schurs && sub_schurs->change) {
6438:           PetscInt i;

6440:           for (i=0;i<sub_schurs->n_subs;i++) {
6441:             KSPDestroy(&sub_schurs->change[i]);
6442:           }
6443:           PetscFree(sub_schurs->change);
6444:         }
6445:       }
6446:       if (pcbddc->switch_static) { /* need to save the local change */
6447:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6448:       } else {
6449:         MatDestroy(&localChangeOfBasisMatrix);
6450:       }
6451:       /* determine if any process has changed the pressures locally */
6452:       pcbddc->change_interior = pcbddc->benign_have_null;
6453:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6454:       MatDestroy(&pcbddc->ConstraintMatrix);
6455:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6456:       pcbddc->use_qr_single = qr_needed;
6457:     }
6458:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6459:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6460:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6461:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6462:     } else {
6463:       Mat benign_global = NULL;
6464:       if (pcbddc->benign_have_null) {
6465:         Mat tmat;

6467:         pcbddc->change_interior = PETSC_TRUE;
6468:         VecSet(pcis->vec1_global,0.0);
6469:         VecSet(pcis->vec1_N,1.0);
6470:         VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6471:         VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6472:         VecReciprocal(pcis->vec1_global);
6473:         VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6474:         VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6475:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6476:         if (pcbddc->benign_change) {
6477:           Mat M;

6479:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6480:           MatDiagonalScale(M,pcis->vec1_N,NULL);
6481:           MatISSetLocalMat(tmat,M);
6482:           MatDestroy(&M);
6483:         } else {
6484:           Mat         eye;
6485:           PetscScalar *array;

6487:           VecGetArray(pcis->vec1_N,&array);
6488:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);
6489:           for (i=0;i<pcis->n;i++) {
6490:             MatSetValue(eye,i,i,array[i],INSERT_VALUES);
6491:           }
6492:           VecRestoreArray(pcis->vec1_N,&array);
6493:           MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);
6494:           MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);
6495:           MatISSetLocalMat(tmat,eye);
6496:           MatDestroy(&eye);
6497:         }
6498:         MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);
6499:         MatDestroy(&tmat);
6500:       }
6501:       if (pcbddc->user_ChangeOfBasisMatrix) {
6502:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6503:         MatDestroy(&benign_global);
6504:       } else if (pcbddc->benign_have_null) {
6505:         pcbddc->ChangeOfBasisMatrix = benign_global;
6506:       }
6507:     }
6508:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6509:       IS             is_global;
6510:       const PetscInt *gidxs;

6512:       ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6513:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6514:       ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6515:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6516:       ISDestroy(&is_global);
6517:     }
6518:   }
6519:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6520:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6521:   }

6523:   if (!pcbddc->fake_change) {
6524:     /* add pressure dofs to set of primal nodes for numbering purposes */
6525:     for (i=0;i<pcbddc->benign_n;i++) {
6526:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6527:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6528:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6529:       pcbddc->local_primal_size_cc++;
6530:       pcbddc->local_primal_size++;
6531:     }

6533:     /* check if a new primal space has been introduced (also take into account benign trick) */
6534:     pcbddc->new_primal_space_local = PETSC_TRUE;
6535:     if (olocal_primal_size == pcbddc->local_primal_size) {
6536:       PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6537:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6538:       if (!pcbddc->new_primal_space_local) {
6539:         PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6540:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6541:       }
6542:     }
6543:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6544:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
6545:   }
6546:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

6548:   /* flush dbg viewer */
6549:   if (pcbddc->dbg_flag) {
6550:     PetscViewerFlush(pcbddc->dbg_viewer);
6551:   }

6553:   /* free workspace */
6554:   PetscBTDestroy(&qr_needed_idx);
6555:   PetscBTDestroy(&change_basis);
6556:   if (!pcbddc->adaptive_selection) {
6557:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
6558:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
6559:   } else {
6560:     PetscFree5(pcbddc->adaptive_constraints_n,
6561:                       pcbddc->adaptive_constraints_idxs_ptr,
6562:                       pcbddc->adaptive_constraints_data_ptr,
6563:                       pcbddc->adaptive_constraints_idxs,
6564:                       pcbddc->adaptive_constraints_data);
6565:     PetscFree(constraints_n);
6566:     PetscFree(constraints_idxs_B);
6567:   }
6568:   return(0);
6569: }

6571: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6572: {
6573:   ISLocalToGlobalMapping map;
6574:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
6575:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
6576:   PetscInt               i,N;
6577:   PetscBool              rcsr = PETSC_FALSE;
6578:   PetscErrorCode         ierr;

6581:   if (pcbddc->recompute_topography) {
6582:     pcbddc->graphanalyzed = PETSC_FALSE;
6583:     /* Reset previously computed graph */
6584:     PCBDDCGraphReset(pcbddc->mat_graph);
6585:     /* Init local Graph struct */
6586:     MatGetSize(pc->pmat,&N,NULL);
6587:     MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
6588:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

6590:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
6591:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
6592:     }
6593:     /* Check validity of the csr graph passed in by the user */
6594:     if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);

6596:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6597:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6598:       PetscInt  *xadj,*adjncy;
6599:       PetscInt  nvtxs;
6600:       PetscBool flg_row=PETSC_FALSE;

6602:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6603:       if (flg_row) {
6604:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
6605:         pcbddc->computed_rowadj = PETSC_TRUE;
6606:       }
6607:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6608:       rcsr = PETSC_TRUE;
6609:     }
6610:     if (pcbddc->dbg_flag) {
6611:       PetscViewerFlush(pcbddc->dbg_viewer);
6612:     }

6614:     /* Setup of Graph */
6615:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6616:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

6618:     /* attach info on disconnected subdomains if present */
6619:     if (pcbddc->n_local_subs) {
6620:       PetscInt *local_subs;

6622:       PetscMalloc1(N,&local_subs);
6623:       for (i=0;i<pcbddc->n_local_subs;i++) {
6624:         const PetscInt *idxs;
6625:         PetscInt       nl,j;

6627:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
6628:         ISGetIndices(pcbddc->local_subs[i],&idxs);
6629:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
6630:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
6631:       }
6632:       pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
6633:       pcbddc->mat_graph->local_subs = local_subs;
6634:     }
6635:   }

6637:   if (!pcbddc->graphanalyzed) {
6638:     /* Graph's connected components analysis */
6639:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6640:     pcbddc->graphanalyzed = PETSC_TRUE;
6641:   }
6642:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6643:   return(0);
6644: }

6646: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
6647: {
6648:   PetscInt       i,j;
6649:   PetscScalar    *alphas;

6653:   PetscMalloc1(n,&alphas);
6654:   for (i=0;i<n;i++) {
6655:     VecNormalize(vecs[i],NULL);
6656:     VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);
6657:     for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]);
6658:     VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);
6659:   }
6660:   PetscFree(alphas);
6661:   return(0);
6662: }

6664: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
6665: {
6666:   Mat            A;
6667:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
6668:   PetscMPIInt    size,rank,color;
6669:   PetscInt       *xadj,*adjncy;
6670:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
6671:   PetscInt       im_active,active_procs,N,n,i,j,threshold = 2;
6672:   PetscInt       void_procs,*procs_candidates = NULL;
6673:   PetscInt       xadj_count,*count;
6674:   PetscBool      ismatis,use_vwgt=PETSC_FALSE;
6675:   PetscSubcomm   psubcomm;
6676:   MPI_Comm       subcomm;

6681:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
6682:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
6685:   if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains);

6687:   if (have_void) *have_void = PETSC_FALSE;
6688:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
6689:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
6690:   MatISGetLocalMat(mat,&A);
6691:   MatGetLocalSize(A,&n,NULL);
6692:   im_active = !!n;
6693:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
6694:   void_procs = size - active_procs;
6695:   /* get ranks of of non-active processes in mat communicator */
6696:   if (void_procs) {
6697:     PetscInt ncand;

6699:     if (have_void) *have_void = PETSC_TRUE;
6700:     PetscMalloc1(size,&procs_candidates);
6701:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
6702:     for (i=0,ncand=0;i<size;i++) {
6703:       if (!procs_candidates[i]) {
6704:         procs_candidates[ncand++] = i;
6705:       }
6706:     }
6707:     /* force n_subdomains to be not greater that the number of non-active processes */
6708:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
6709:   }

6711:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
6712:      number of subdomains requested 1 -> send to master or first candidate in voids  */
6713:   MatGetSize(mat,&N,NULL);
6714:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
6715:     PetscInt issize,isidx,dest;
6716:     if (*n_subdomains == 1) dest = 0;
6717:     else dest = rank;
6718:     if (im_active) {
6719:       issize = 1;
6720:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6721:         isidx = procs_candidates[dest];
6722:       } else {
6723:         isidx = dest;
6724:       }
6725:     } else {
6726:       issize = 0;
6727:       isidx = -1;
6728:     }
6729:     if (*n_subdomains != 1) *n_subdomains = active_procs;
6730:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
6731:     PetscFree(procs_candidates);
6732:     return(0);
6733:   }
6734:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
6735:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
6736:   threshold = PetscMax(threshold,2);

6738:   /* Get info on mapping */
6739:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

6741:   /* build local CSR graph of subdomains' connectivity */
6742:   PetscMalloc1(2,&xadj);
6743:   xadj[0] = 0;
6744:   xadj[1] = PetscMax(n_neighs-1,0);
6745:   PetscMalloc1(xadj[1],&adjncy);
6746:   PetscMalloc1(xadj[1],&adjncy_wgt);
6747:   PetscCalloc1(n,&count);
6748:   for (i=1;i<n_neighs;i++)
6749:     for (j=0;j<n_shared[i];j++)
6750:       count[shared[i][j]] += 1;

6752:   xadj_count = 0;
6753:   for (i=1;i<n_neighs;i++) {
6754:     for (j=0;j<n_shared[i];j++) {
6755:       if (count[shared[i][j]] < threshold) {
6756:         adjncy[xadj_count] = neighs[i];
6757:         adjncy_wgt[xadj_count] = n_shared[i];
6758:         xadj_count++;
6759:         break;
6760:       }
6761:     }
6762:   }
6763:   xadj[1] = xadj_count;
6764:   PetscFree(count);
6765:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
6766:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

6768:   PetscMalloc1(1,&ranks_send_to_idx);

6770:   /* Restrict work on active processes only */
6771:   PetscMPIIntCast(im_active,&color);
6772:   if (void_procs) {
6773:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
6774:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
6775:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
6776:     subcomm = PetscSubcommChild(psubcomm);
6777:   } else {
6778:     psubcomm = NULL;
6779:     subcomm = PetscObjectComm((PetscObject)mat);
6780:   }

6782:   v_wgt = NULL;
6783:   if (!color) {
6784:     PetscFree(xadj);
6785:     PetscFree(adjncy);
6786:     PetscFree(adjncy_wgt);
6787:   } else {
6788:     Mat             subdomain_adj;
6789:     IS              new_ranks,new_ranks_contig;
6790:     MatPartitioning partitioner;
6791:     PetscInt        rstart=0,rend=0;
6792:     PetscInt        *is_indices,*oldranks;
6793:     PetscMPIInt     size;
6794:     PetscBool       aggregate;

6796:     MPI_Comm_size(subcomm,&size);
6797:     if (void_procs) {
6798:       PetscInt prank = rank;
6799:       PetscMalloc1(size,&oldranks);
6800:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
6801:       for (i=0;i<xadj[1];i++) {
6802:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
6803:       }
6804:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
6805:     } else {
6806:       oldranks = NULL;
6807:     }
6808:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
6809:     if (aggregate) { /* TODO: all this part could be made more efficient */
6810:       PetscInt    lrows,row,ncols,*cols;
6811:       PetscMPIInt nrank;
6812:       PetscScalar *vals;

6814:       MPI_Comm_rank(subcomm,&nrank);
6815:       lrows = 0;
6816:       if (nrank<redprocs) {
6817:         lrows = size/redprocs;
6818:         if (nrank<size%redprocs) lrows++;
6819:       }
6820:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
6821:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
6822:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
6823:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
6824:       row = nrank;
6825:       ncols = xadj[1]-xadj[0];
6826:       cols = adjncy;
6827:       PetscMalloc1(ncols,&vals);
6828:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
6829:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
6830:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
6831:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
6832:       PetscFree(xadj);
6833:       PetscFree(adjncy);
6834:       PetscFree(adjncy_wgt);
6835:       PetscFree(vals);
6836:       if (use_vwgt) {
6837:         Vec               v;
6838:         const PetscScalar *array;
6839:         PetscInt          nl;

6841:         MatCreateVecs(subdomain_adj,&v,NULL);
6842:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
6843:         VecAssemblyBegin(v);
6844:         VecAssemblyEnd(v);
6845:         VecGetLocalSize(v,&nl);
6846:         VecGetArrayRead(v,&array);
6847:         PetscMalloc1(nl,&v_wgt);
6848:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
6849:         VecRestoreArrayRead(v,&array);
6850:         VecDestroy(&v);
6851:       }
6852:     } else {
6853:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
6854:       if (use_vwgt) {
6855:         PetscMalloc1(1,&v_wgt);
6856:         v_wgt[0] = n;
6857:       }
6858:     }
6859:     /* MatView(subdomain_adj,0); */

6861:     /* Partition */
6862:     MatPartitioningCreate(subcomm,&partitioner);
6863:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
6864:     if (v_wgt) {
6865:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
6866:     }
6867:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
6868:     MatPartitioningSetNParts(partitioner,*n_subdomains);
6869:     MatPartitioningSetFromOptions(partitioner);
6870:     MatPartitioningApply(partitioner,&new_ranks);
6871:     /* MatPartitioningView(partitioner,0); */

6873:     /* renumber new_ranks to avoid "holes" in new set of processors */
6874:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
6875:     ISDestroy(&new_ranks);
6876:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
6877:     if (!aggregate) {
6878:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6879: #if defined(PETSC_USE_DEBUG)
6880:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
6881: #endif
6882:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
6883:       } else if (oldranks) {
6884:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
6885:       } else {
6886:         ranks_send_to_idx[0] = is_indices[0];
6887:       }
6888:     } else {
6889:       PetscInt    idxs[1];
6890:       PetscMPIInt tag;
6891:       MPI_Request *reqs;

6893:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
6894:       PetscMalloc1(rend-rstart,&reqs);
6895:       for (i=rstart;i<rend;i++) {
6896:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
6897:       }
6898:       MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
6899:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
6900:       PetscFree(reqs);
6901:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6902: #if defined(PETSC_USE_DEBUG)
6903:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
6904: #endif
6905:         ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]];
6906:       } else if (oldranks) {
6907:         ranks_send_to_idx[0] = oldranks[idxs[0]];
6908:       } else {
6909:         ranks_send_to_idx[0] = idxs[0];
6910:       }
6911:     }
6912:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
6913:     /* clean up */
6914:     PetscFree(oldranks);
6915:     ISDestroy(&new_ranks_contig);
6916:     MatDestroy(&subdomain_adj);
6917:     MatPartitioningDestroy(&partitioner);
6918:   }
6919:   PetscSubcommDestroy(&psubcomm);
6920:   PetscFree(procs_candidates);

6922:   /* assemble parallel IS for sends */
6923:   i = 1;
6924:   if (!color) i=0;
6925:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
6926:   return(0);
6927: }

6929: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

6931: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
6932: {
6933:   Mat                    local_mat;
6934:   IS                     is_sends_internal;
6935:   PetscInt               rows,cols,new_local_rows;
6936:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
6937:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
6938:   ISLocalToGlobalMapping l2gmap;
6939:   PetscInt*              l2gmap_indices;
6940:   const PetscInt*        is_indices;
6941:   MatType                new_local_type;
6942:   /* buffers */
6943:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
6944:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
6945:   PetscInt               *recv_buffer_idxs_local;
6946:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
6947:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
6948:   /* MPI */
6949:   MPI_Comm               comm,comm_n;
6950:   PetscSubcomm           subcomm;
6951:   PetscMPIInt            n_sends,n_recvs,commsize;
6952:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
6953:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
6954:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
6955:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
6956:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
6957:   PetscErrorCode         ierr;

6961:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
6962:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
6969:   if (nvecs) {
6970:     if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
6972:   }
6973:   /* further checks */
6974:   MatISGetLocalMat(mat,&local_mat);
6975:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
6976:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
6977:   MatGetSize(local_mat,&rows,&cols);
6978:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
6979:   if (reuse && *mat_n) {
6980:     PetscInt mrows,mcols,mnrows,mncols;
6982:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
6983:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
6984:     MatGetSize(mat,&mrows,&mcols);
6985:     MatGetSize(*mat_n,&mnrows,&mncols);
6986:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
6987:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
6988:   }
6989:   MatGetBlockSize(local_mat,&bs);

6992:   /* prepare IS for sending if not provided */
6993:   if (!is_sends) {
6994:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
6995:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
6996:   } else {
6997:     PetscObjectReference((PetscObject)is_sends);
6998:     is_sends_internal = is_sends;
6999:   }

7001:   /* get comm */
7002:   PetscObjectGetComm((PetscObject)mat,&comm);

7004:   /* compute number of sends */
7005:   ISGetLocalSize(is_sends_internal,&i);
7006:   PetscMPIIntCast(i,&n_sends);

7008:   /* compute number of receives */
7009:   MPI_Comm_size(comm,&commsize);
7010:   PetscMalloc1(commsize,&iflags);
7011:   PetscMemzero(iflags,commsize*sizeof(*iflags));
7012:   ISGetIndices(is_sends_internal,&is_indices);
7013:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7014:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7015:   PetscFree(iflags);

7017:   /* restrict comm if requested */
7018:   subcomm = 0;
7019:   destroy_mat = PETSC_FALSE;
7020:   if (restrict_comm) {
7021:     PetscMPIInt color,subcommsize;

7023:     color = 0;
7024:     if (restrict_full) {
7025:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7026:     } else {
7027:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7028:     }
7029:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7030:     subcommsize = commsize - subcommsize;
7031:     /* check if reuse has been requested */
7032:     if (reuse) {
7033:       if (*mat_n) {
7034:         PetscMPIInt subcommsize2;
7035:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7036:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7037:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7038:       } else {
7039:         comm_n = PETSC_COMM_SELF;
7040:       }
7041:     } else { /* MAT_INITIAL_MATRIX */
7042:       PetscMPIInt rank;

7044:       MPI_Comm_rank(comm,&rank);
7045:       PetscSubcommCreate(comm,&subcomm);
7046:       PetscSubcommSetNumber(subcomm,2);
7047:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7048:       comm_n = PetscSubcommChild(subcomm);
7049:     }
7050:     /* flag to destroy *mat_n if not significative */
7051:     if (color) destroy_mat = PETSC_TRUE;
7052:   } else {
7053:     comm_n = comm;
7054:   }

7056:   /* prepare send/receive buffers */
7057:   PetscMalloc1(commsize,&ilengths_idxs);
7058:   PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
7059:   PetscMalloc1(commsize,&ilengths_vals);
7060:   PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
7061:   if (nis) {
7062:     PetscCalloc1(commsize,&ilengths_idxs_is);
7063:   }

7065:   /* Get data from local matrices */
7066:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7067:     /* TODO: See below some guidelines on how to prepare the local buffers */
7068:     /*
7069:        send_buffer_vals should contain the raw values of the local matrix
7070:        send_buffer_idxs should contain:
7071:        - MatType_PRIVATE type
7072:        - PetscInt        size_of_l2gmap
7073:        - PetscInt        global_row_indices[size_of_l2gmap]
7074:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7075:     */
7076:   else {
7077:     MatDenseGetArray(local_mat,&send_buffer_vals);
7078:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7079:     PetscMalloc1(i+2,&send_buffer_idxs);
7080:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7081:     send_buffer_idxs[1] = i;
7082:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7083:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7084:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7085:     PetscMPIIntCast(i,&len);
7086:     for (i=0;i<n_sends;i++) {
7087:       ilengths_vals[is_indices[i]] = len*len;
7088:       ilengths_idxs[is_indices[i]] = len+2;
7089:     }
7090:   }
7091:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7092:   /* additional is (if any) */
7093:   if (nis) {
7094:     PetscMPIInt psum;
7095:     PetscInt j;
7096:     for (j=0,psum=0;j<nis;j++) {
7097:       PetscInt plen;
7098:       ISGetLocalSize(isarray[j],&plen);
7099:       PetscMPIIntCast(plen,&len);
7100:       psum += len+1; /* indices + lenght */
7101:     }
7102:     PetscMalloc1(psum,&send_buffer_idxs_is);
7103:     for (j=0,psum=0;j<nis;j++) {
7104:       PetscInt plen;
7105:       const PetscInt *is_array_idxs;
7106:       ISGetLocalSize(isarray[j],&plen);
7107:       send_buffer_idxs_is[psum] = plen;
7108:       ISGetIndices(isarray[j],&is_array_idxs);
7109:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7110:       ISRestoreIndices(isarray[j],&is_array_idxs);
7111:       psum += plen+1; /* indices + lenght */
7112:     }
7113:     for (i=0;i<n_sends;i++) {
7114:       ilengths_idxs_is[is_indices[i]] = psum;
7115:     }
7116:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7117:   }
7118:   MatISRestoreLocalMat(mat,&local_mat);

7120:   buf_size_idxs = 0;
7121:   buf_size_vals = 0;
7122:   buf_size_idxs_is = 0;
7123:   buf_size_vecs = 0;
7124:   for (i=0;i<n_recvs;i++) {
7125:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7126:     buf_size_vals += (PetscInt)olengths_vals[i];
7127:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7128:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7129:   }
7130:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7131:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7132:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7133:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7135:   /* get new tags for clean communications */
7136:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7137:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7138:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7139:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7141:   /* allocate for requests */
7142:   PetscMalloc1(n_sends,&send_req_idxs);
7143:   PetscMalloc1(n_sends,&send_req_vals);
7144:   PetscMalloc1(n_sends,&send_req_idxs_is);
7145:   PetscMalloc1(n_sends,&send_req_vecs);
7146:   PetscMalloc1(n_recvs,&recv_req_idxs);
7147:   PetscMalloc1(n_recvs,&recv_req_vals);
7148:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7149:   PetscMalloc1(n_recvs,&recv_req_vecs);

7151:   /* communications */
7152:   ptr_idxs = recv_buffer_idxs;
7153:   ptr_vals = recv_buffer_vals;
7154:   ptr_idxs_is = recv_buffer_idxs_is;
7155:   ptr_vecs = recv_buffer_vecs;
7156:   for (i=0;i<n_recvs;i++) {
7157:     source_dest = onodes[i];
7158:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7159:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7160:     ptr_idxs += olengths_idxs[i];
7161:     ptr_vals += olengths_vals[i];
7162:     if (nis) {
7163:       source_dest = onodes_is[i];
7164:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7165:       ptr_idxs_is += olengths_idxs_is[i];
7166:     }
7167:     if (nvecs) {
7168:       source_dest = onodes[i];
7169:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7170:       ptr_vecs += olengths_idxs[i]-2;
7171:     }
7172:   }
7173:   for (i=0;i<n_sends;i++) {
7174:     PetscMPIIntCast(is_indices[i],&source_dest);
7175:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7176:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7177:     if (nis) {
7178:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7179:     }
7180:     if (nvecs) {
7181:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7182:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7183:     }
7184:   }
7185:   ISRestoreIndices(is_sends_internal,&is_indices);
7186:   ISDestroy(&is_sends_internal);

7188:   /* assemble new l2g map */
7189:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7190:   ptr_idxs = recv_buffer_idxs;
7191:   new_local_rows = 0;
7192:   for (i=0;i<n_recvs;i++) {
7193:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7194:     ptr_idxs += olengths_idxs[i];
7195:   }
7196:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7197:   ptr_idxs = recv_buffer_idxs;
7198:   new_local_rows = 0;
7199:   for (i=0;i<n_recvs;i++) {
7200:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7201:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7202:     ptr_idxs += olengths_idxs[i];
7203:   }
7204:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7205:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7206:   PetscFree(l2gmap_indices);

7208:   /* infer new local matrix type from received local matrices type */
7209:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7210:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7211:   if (n_recvs) {
7212:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7213:     ptr_idxs = recv_buffer_idxs;
7214:     for (i=0;i<n_recvs;i++) {
7215:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7216:         new_local_type_private = MATAIJ_PRIVATE;
7217:         break;
7218:       }
7219:       ptr_idxs += olengths_idxs[i];
7220:     }
7221:     switch (new_local_type_private) {
7222:       case MATDENSE_PRIVATE:
7223:         new_local_type = MATSEQAIJ;
7224:         bs = 1;
7225:         break;
7226:       case MATAIJ_PRIVATE:
7227:         new_local_type = MATSEQAIJ;
7228:         bs = 1;
7229:         break;
7230:       case MATBAIJ_PRIVATE:
7231:         new_local_type = MATSEQBAIJ;
7232:         break;
7233:       case MATSBAIJ_PRIVATE:
7234:         new_local_type = MATSEQSBAIJ;
7235:         break;
7236:       default:
7237:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7238:         break;
7239:     }
7240:   } else { /* by default, new_local_type is seqaij */
7241:     new_local_type = MATSEQAIJ;
7242:     bs = 1;
7243:   }

7245:   /* create MATIS object if needed */
7246:   if (!reuse) {
7247:     MatGetSize(mat,&rows,&cols);
7248:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7249:   } else {
7250:     /* it also destroys the local matrices */
7251:     if (*mat_n) {
7252:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7253:     } else { /* this is a fake object */
7254:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7255:     }
7256:   }
7257:   MatISGetLocalMat(*mat_n,&local_mat);
7258:   MatSetType(local_mat,new_local_type);

7260:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7262:   /* Global to local map of received indices */
7263:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7264:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7265:   ISLocalToGlobalMappingDestroy(&l2gmap);

7267:   /* restore attributes -> type of incoming data and its size */
7268:   buf_size_idxs = 0;
7269:   for (i=0;i<n_recvs;i++) {
7270:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7271:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7272:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7273:   }
7274:   PetscFree(recv_buffer_idxs);

7276:   /* set preallocation */
7277:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7278:   if (!newisdense) {
7279:     PetscInt *new_local_nnz=0;

7281:     ptr_idxs = recv_buffer_idxs_local;
7282:     if (n_recvs) {
7283:       PetscCalloc1(new_local_rows,&new_local_nnz);
7284:     }
7285:     for (i=0;i<n_recvs;i++) {
7286:       PetscInt j;
7287:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7288:         for (j=0;j<*(ptr_idxs+1);j++) {
7289:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7290:         }
7291:       } else {
7292:         /* TODO */
7293:       }
7294:       ptr_idxs += olengths_idxs[i];
7295:     }
7296:     if (new_local_nnz) {
7297:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7298:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7299:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7300:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7301:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7302:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7303:     } else {
7304:       MatSetUp(local_mat);
7305:     }
7306:     PetscFree(new_local_nnz);
7307:   } else {
7308:     MatSetUp(local_mat);
7309:   }

7311:   /* set values */
7312:   ptr_vals = recv_buffer_vals;
7313:   ptr_idxs = recv_buffer_idxs_local;
7314:   for (i=0;i<n_recvs;i++) {
7315:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7316:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7317:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7318:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7319:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7320:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7321:     } else {
7322:       /* TODO */
7323:     }
7324:     ptr_idxs += olengths_idxs[i];
7325:     ptr_vals += olengths_vals[i];
7326:   }
7327:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7328:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7329:   MatISRestoreLocalMat(*mat_n,&local_mat);
7330:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7331:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7332:   PetscFree(recv_buffer_vals);

7334: #if 0
7335:   if (!restrict_comm) { /* check */
7336:     Vec       lvec,rvec;
7337:     PetscReal infty_error;

7339:     MatCreateVecs(mat,&rvec,&lvec);
7340:     VecSetRandom(rvec,NULL);
7341:     MatMult(mat,rvec,lvec);
7342:     VecScale(lvec,-1.0);
7343:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7344:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7345:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7346:     VecDestroy(&rvec);
7347:     VecDestroy(&lvec);
7348:   }
7349: #endif

7351:   /* assemble new additional is (if any) */
7352:   if (nis) {
7353:     PetscInt **temp_idxs,*count_is,j,psum;

7355:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7356:     PetscCalloc1(nis,&count_is);
7357:     ptr_idxs = recv_buffer_idxs_is;
7358:     psum = 0;
7359:     for (i=0;i<n_recvs;i++) {
7360:       for (j=0;j<nis;j++) {
7361:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7362:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7363:         psum += plen;
7364:         ptr_idxs += plen+1; /* shift pointer to received data */
7365:       }
7366:     }
7367:     PetscMalloc1(nis,&temp_idxs);
7368:     PetscMalloc1(psum,&temp_idxs[0]);
7369:     for (i=1;i<nis;i++) {
7370:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7371:     }
7372:     PetscMemzero(count_is,nis*sizeof(PetscInt));
7373:     ptr_idxs = recv_buffer_idxs_is;
7374:     for (i=0;i<n_recvs;i++) {
7375:       for (j=0;j<nis;j++) {
7376:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7377:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7378:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7379:         ptr_idxs += plen+1; /* shift pointer to received data */
7380:       }
7381:     }
7382:     for (i=0;i<nis;i++) {
7383:       ISDestroy(&isarray[i]);
7384:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7385:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7386:     }
7387:     PetscFree(count_is);
7388:     PetscFree(temp_idxs[0]);
7389:     PetscFree(temp_idxs);
7390:   }
7391:   /* free workspace */
7392:   PetscFree(recv_buffer_idxs_is);
7393:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7394:   PetscFree(send_buffer_idxs);
7395:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7396:   if (isdense) {
7397:     MatISGetLocalMat(mat,&local_mat);
7398:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
7399:     MatISRestoreLocalMat(mat,&local_mat);
7400:   } else {
7401:     /* PetscFree(send_buffer_vals); */
7402:   }
7403:   if (nis) {
7404:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7405:     PetscFree(send_buffer_idxs_is);
7406:   }

7408:   if (nvecs) {
7409:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7410:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7411:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7412:     VecDestroy(&nnsp_vec[0]);
7413:     VecCreate(comm_n,&nnsp_vec[0]);
7414:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7415:     VecSetType(nnsp_vec[0],VECSTANDARD);
7416:     /* set values */
7417:     ptr_vals = recv_buffer_vecs;
7418:     ptr_idxs = recv_buffer_idxs_local;
7419:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7420:     for (i=0;i<n_recvs;i++) {
7421:       PetscInt j;
7422:       for (j=0;j<*(ptr_idxs+1);j++) {
7423:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7424:       }
7425:       ptr_idxs += olengths_idxs[i];
7426:       ptr_vals += olengths_idxs[i]-2;
7427:     }
7428:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7429:     VecAssemblyBegin(nnsp_vec[0]);
7430:     VecAssemblyEnd(nnsp_vec[0]);
7431:   }

7433:   PetscFree(recv_buffer_vecs);
7434:   PetscFree(recv_buffer_idxs_local);
7435:   PetscFree(recv_req_idxs);
7436:   PetscFree(recv_req_vals);
7437:   PetscFree(recv_req_vecs);
7438:   PetscFree(recv_req_idxs_is);
7439:   PetscFree(send_req_idxs);
7440:   PetscFree(send_req_vals);
7441:   PetscFree(send_req_vecs);
7442:   PetscFree(send_req_idxs_is);
7443:   PetscFree(ilengths_vals);
7444:   PetscFree(ilengths_idxs);
7445:   PetscFree(olengths_vals);
7446:   PetscFree(olengths_idxs);
7447:   PetscFree(onodes);
7448:   if (nis) {
7449:     PetscFree(ilengths_idxs_is);
7450:     PetscFree(olengths_idxs_is);
7451:     PetscFree(onodes_is);
7452:   }
7453:   PetscSubcommDestroy(&subcomm);
7454:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7455:     MatDestroy(mat_n);
7456:     for (i=0;i<nis;i++) {
7457:       ISDestroy(&isarray[i]);
7458:     }
7459:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7460:       VecDestroy(&nnsp_vec[0]);
7461:     }
7462:     *mat_n = NULL;
7463:   }
7464:   return(0);
7465: }

7467: /* temporary hack into ksp private data structure */
7468:  #include <petsc/private/kspimpl.h>

7470: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7471: {
7472:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7473:   PC_IS                  *pcis = (PC_IS*)pc->data;
7474:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
7475:   Mat                    coarsedivudotp = NULL;
7476:   Mat                    coarseG,t_coarse_mat_is;
7477:   MatNullSpace           CoarseNullSpace = NULL;
7478:   ISLocalToGlobalMapping coarse_islg;
7479:   IS                     coarse_is,*isarray;
7480:   PetscInt               i,im_active=-1,active_procs=-1;
7481:   PetscInt               nis,nisdofs,nisneu,nisvert;
7482:   PC                     pc_temp;
7483:   PCType                 coarse_pc_type;
7484:   KSPType                coarse_ksp_type;
7485:   PetscBool              multilevel_requested,multilevel_allowed;
7486:   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
7487:   PetscInt               ncoarse,nedcfield;
7488:   PetscBool              compute_vecs = PETSC_FALSE;
7489:   PetscScalar            *array;
7490:   MatReuse               coarse_mat_reuse;
7491:   PetscBool              restr, full_restr, have_void;
7492:   PetscMPIInt            commsize;
7493:   PetscErrorCode         ierr;

7496:   /* Assign global numbering to coarse dofs */
7497:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7498:     PetscInt ocoarse_size;
7499:     compute_vecs = PETSC_TRUE;

7501:     pcbddc->new_primal_space = PETSC_TRUE;
7502:     ocoarse_size = pcbddc->coarse_size;
7503:     PetscFree(pcbddc->global_primal_indices);
7504:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
7505:     /* see if we can avoid some work */
7506:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7507:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7508:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7509:         KSPReset(pcbddc->coarse_ksp);
7510:         coarse_reuse = PETSC_FALSE;
7511:       } else { /* we can safely reuse already computed coarse matrix */
7512:         coarse_reuse = PETSC_TRUE;
7513:       }
7514:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7515:       coarse_reuse = PETSC_FALSE;
7516:     }
7517:     /* reset any subassembling information */
7518:     if (!coarse_reuse || pcbddc->recompute_topography) {
7519:       ISDestroy(&pcbddc->coarse_subassembling);
7520:     }
7521:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
7522:     coarse_reuse = PETSC_TRUE;
7523:   }
7524:   /* assemble coarse matrix */
7525:   if (coarse_reuse && pcbddc->coarse_ksp) {
7526:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
7527:     PetscObjectReference((PetscObject)coarse_mat);
7528:     coarse_mat_reuse = MAT_REUSE_MATRIX;
7529:   } else {
7530:     coarse_mat = NULL;
7531:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
7532:   }

7534:   /* creates temporary l2gmap and IS for coarse indexes */
7535:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
7536:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

7538:   /* creates temporary MATIS object for coarse matrix */
7539:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
7540:   MatDenseGetArray(coarse_submat_dense,&array);
7541:   PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
7542:   MatDenseRestoreArray(coarse_submat_dense,&array);
7543:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
7544:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
7545:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7546:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7547:   MatDestroy(&coarse_submat_dense);

7549:   /* count "active" (i.e. with positive local size) and "void" processes */
7550:   im_active = !!(pcis->n);
7551:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

7553:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7554:   /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
7555:   /* full_restr : just use the receivers from the subassembling pattern */
7556:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);
7557:   coarse_mat_is = NULL;
7558:   multilevel_allowed = PETSC_FALSE;
7559:   multilevel_requested = PETSC_FALSE;
7560:   pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
7561:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7562:   if (multilevel_requested) {
7563:     ncoarse = active_procs/pcbddc->coarsening_ratio;
7564:     restr = PETSC_FALSE;
7565:     full_restr = PETSC_FALSE;
7566:   } else {
7567:     ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc;
7568:     restr = PETSC_TRUE;
7569:     full_restr = PETSC_TRUE;
7570:   }
7571:   if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7572:   ncoarse = PetscMax(1,ncoarse);
7573:   if (!pcbddc->coarse_subassembling) {
7574:     if (pcbddc->coarsening_ratio > 1) {
7575:       if (multilevel_requested) {
7576:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7577:       } else {
7578:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7579:       }
7580:     } else {
7581:       PetscMPIInt rank;
7582:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
7583:       have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE;
7584:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
7585:     }
7586:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7587:     PetscInt    psum;
7588:     if (pcbddc->coarse_ksp) psum = 1;
7589:     else psum = 0;
7590:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7591:     if (ncoarse < commsize) have_void = PETSC_TRUE;
7592:   }
7593:   /* determine if we can go multilevel */
7594:   if (multilevel_requested) {
7595:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7596:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7597:   }
7598:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

7600:   /* dump subassembling pattern */
7601:   if (pcbddc->dbg_flag && multilevel_allowed) {
7602:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
7603:   }

7605:   /* compute dofs splitting and neumann boundaries for coarse dofs */
7606:   nedcfield = -1;
7607:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */
7608:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
7609:     const PetscInt         *idxs;
7610:     ISLocalToGlobalMapping tmap;

7612:     /* create map between primal indices (in local representative ordering) and local primal numbering */
7613:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
7614:     /* allocate space for temporary storage */
7615:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
7616:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
7617:     /* allocate for IS array */
7618:     nisdofs = pcbddc->n_ISForDofsLocal;
7619:     if (pcbddc->nedclocal) {
7620:       if (pcbddc->nedfield > -1) {
7621:         nedcfield = pcbddc->nedfield;
7622:       } else {
7623:         nedcfield = 0;
7624:         if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs);
7625:         nisdofs = 1;
7626:       }
7627:     }
7628:     nisneu = !!pcbddc->NeumannBoundariesLocal;
7629:     nisvert = 0; /* nisvert is not used */
7630:     nis = nisdofs + nisneu + nisvert;
7631:     PetscMalloc1(nis,&isarray);
7632:     /* dofs splitting */
7633:     for (i=0;i<nisdofs;i++) {
7634:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
7635:       if (nedcfield != i) {
7636:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
7637:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
7638:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7639:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
7640:       } else {
7641:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
7642:         ISGetIndices(pcbddc->nedclocal,&idxs);
7643:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7644:         if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout);
7645:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
7646:       }
7647:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7648:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
7649:       /* ISView(isarray[i],0); */
7650:     }
7651:     /* neumann boundaries */
7652:     if (pcbddc->NeumannBoundariesLocal) {
7653:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
7654:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
7655:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7656:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7657:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7658:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7659:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
7660:       /* ISView(isarray[nisdofs],0); */
7661:     }
7662:     /* free memory */
7663:     PetscFree(tidxs);
7664:     PetscFree(tidxs2);
7665:     ISLocalToGlobalMappingDestroy(&tmap);
7666:   } else {
7667:     nis = 0;
7668:     nisdofs = 0;
7669:     nisneu = 0;
7670:     nisvert = 0;
7671:     isarray = NULL;
7672:   }
7673:   /* destroy no longer needed map */
7674:   ISLocalToGlobalMappingDestroy(&coarse_islg);

7676:   /* subassemble */
7677:   if (multilevel_allowed) {
7678:     Vec       vp[1];
7679:     PetscInt  nvecs = 0;
7680:     PetscBool reuse,reuser;

7682:     if (coarse_mat) reuse = PETSC_TRUE;
7683:     else reuse = PETSC_FALSE;
7684:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7685:     vp[0] = NULL;
7686:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7687:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
7688:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
7689:       VecSetType(vp[0],VECSTANDARD);
7690:       nvecs = 1;

7692:       if (pcbddc->divudotp) {
7693:         Mat      B,loc_divudotp;
7694:         Vec      v,p;
7695:         IS       dummy;
7696:         PetscInt np;

7698:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
7699:         MatGetSize(loc_divudotp,&np,NULL);
7700:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
7701:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
7702:         MatCreateVecs(B,&v,&p);
7703:         VecSet(p,1.);
7704:         MatMultTranspose(B,p,v);
7705:         VecDestroy(&p);
7706:         MatDestroy(&B);
7707:         VecGetArray(vp[0],&array);
7708:         VecPlaceArray(pcbddc->vec1_P,array);
7709:         VecRestoreArray(vp[0],&array);
7710:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
7711:         VecResetArray(pcbddc->vec1_P);
7712:         ISDestroy(&dummy);
7713:         VecDestroy(&v);
7714:       }
7715:     }
7716:     if (reuser) {
7717:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
7718:     } else {
7719:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
7720:     }
7721:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
7722:       PetscScalar *arraym,*arrayv;
7723:       PetscInt    nl;
7724:       VecGetLocalSize(vp[0],&nl);
7725:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
7726:       MatDenseGetArray(coarsedivudotp,&arraym);
7727:       VecGetArray(vp[0],&arrayv);
7728:       PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
7729:       VecRestoreArray(vp[0],&arrayv);
7730:       MatDenseRestoreArray(coarsedivudotp,&arraym);
7731:       VecDestroy(&vp[0]);
7732:     } else {
7733:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
7734:     }
7735:   } else {
7736:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
7737:   }
7738:   if (coarse_mat_is || coarse_mat) {
7739:     PetscMPIInt size;
7740:     MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);
7741:     if (!multilevel_allowed) {
7742:       MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
7743:     } else {
7744:       Mat A;

7746:       /* if this matrix is present, it means we are not reusing the coarse matrix */
7747:       if (coarse_mat_is) {
7748:         if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
7749:         PetscObjectReference((PetscObject)coarse_mat_is);
7750:         coarse_mat = coarse_mat_is;
7751:       }
7752:       /* be sure we don't have MatSeqDENSE as local mat */
7753:       MatISGetLocalMat(coarse_mat,&A);
7754:       MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
7755:     }
7756:   }
7757:   MatDestroy(&t_coarse_mat_is);
7758:   MatDestroy(&coarse_mat_is);

7760:   /* create local to global scatters for coarse problem */
7761:   if (compute_vecs) {
7762:     PetscInt lrows;
7763:     VecDestroy(&pcbddc->coarse_vec);
7764:     if (coarse_mat) {
7765:       MatGetLocalSize(coarse_mat,&lrows,NULL);
7766:     } else {
7767:       lrows = 0;
7768:     }
7769:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
7770:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
7771:     VecSetType(pcbddc->coarse_vec,VECSTANDARD);
7772:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
7773:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
7774:   }
7775:   ISDestroy(&coarse_is);

7777:   /* set defaults for coarse KSP and PC */
7778:   if (multilevel_allowed) {
7779:     coarse_ksp_type = KSPRICHARDSON;
7780:     coarse_pc_type = PCBDDC;
7781:   } else {
7782:     coarse_ksp_type = KSPPREONLY;
7783:     coarse_pc_type = PCREDUNDANT;
7784:   }

7786:   /* print some info if requested */
7787:   if (pcbddc->dbg_flag) {
7788:     if (!multilevel_allowed) {
7789:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
7790:       if (multilevel_requested) {
7791:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
7792:       } else if (pcbddc->max_levels) {
7793:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
7794:       }
7795:       PetscViewerFlush(pcbddc->dbg_viewer);
7796:     }
7797:   }

7799:   /* communicate coarse discrete gradient */
7800:   coarseG = NULL;
7801:   if (pcbddc->nedcG && multilevel_allowed) {
7802:     MPI_Comm ccomm;
7803:     if (coarse_mat) {
7804:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
7805:     } else {
7806:       ccomm = MPI_COMM_NULL;
7807:     }
7808:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
7809:   }

7811:   /* create the coarse KSP object only once with defaults */
7812:   if (coarse_mat) {
7813:     PetscViewer dbg_viewer = NULL;
7814:     if (pcbddc->dbg_flag) {
7815:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
7816:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
7817:     }
7818:     if (!pcbddc->coarse_ksp) {
7819:       char prefix[256],str_level[16];
7820:       size_t len;

7822:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
7823:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
7824:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
7825:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
7826:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
7827:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
7828:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
7829:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
7830:       /* TODO is this logic correct? should check for coarse_mat type */
7831:       PCSetType(pc_temp,coarse_pc_type);
7832:       /* prefix */
7833:       PetscStrcpy(prefix,"");
7834:       PetscStrcpy(str_level,"");
7835:       if (!pcbddc->current_level) {
7836:         PetscStrcpy(prefix,((PetscObject)pc)->prefix);
7837:         PetscStrcat(prefix,"pc_bddc_coarse_");
7838:       } else {
7839:         PetscStrlen(((PetscObject)pc)->prefix,&len);
7840:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
7841:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
7842:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
7843:         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
7844:         PetscStrcat(prefix,str_level);
7845:       }
7846:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
7847:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
7848:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
7849:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
7850:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
7851:       /* allow user customization */
7852:       KSPSetFromOptions(pcbddc->coarse_ksp);
7853:     }
7854:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
7855:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
7856:     if (nisdofs) {
7857:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
7858:       for (i=0;i<nisdofs;i++) {
7859:         ISDestroy(&isarray[i]);
7860:       }
7861:     }
7862:     if (nisneu) {
7863:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
7864:       ISDestroy(&isarray[nisdofs]);
7865:     }
7866:     if (nisvert) {
7867:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
7868:       ISDestroy(&isarray[nis-1]);
7869:     }
7870:     if (coarseG) {
7871:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
7872:     }

7874:     /* get some info after set from options */
7875:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
7876:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
7877:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
7878:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
7879:     if (isbddc && !multilevel_allowed) {
7880:       PCSetType(pc_temp,coarse_pc_type);
7881:       isbddc = PETSC_FALSE;
7882:     }
7883:     /* multilevel cannot be done with coarse PCs different from BDDC or NN */
7884:     if (multilevel_requested && !isbddc && !isnn) {
7885:       PCSetType(pc_temp,PCBDDC);
7886:       isbddc = PETSC_TRUE;
7887:       isnn   = PETSC_FALSE;
7888:     }
7889:     PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
7890:     if (isredundant) {
7891:       KSP inner_ksp;
7892:       PC  inner_pc;

7894:       PCRedundantGetKSP(pc_temp,&inner_ksp);
7895:       KSPGetPC(inner_ksp,&inner_pc);
7896:       PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
7897:     }

7899:     /* parameters which miss an API */
7900:     if (isbddc) {
7901:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
7902:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
7903:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
7904:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
7905:       if (pcbddc_coarse->benign_saddle_point) {
7906:         Mat                    coarsedivudotp_is;
7907:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
7908:         IS                     row,col;
7909:         const PetscInt         *gidxs;
7910:         PetscInt               n,st,M,N;

7912:         MatGetSize(coarsedivudotp,&n,NULL);
7913:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
7914:         st   = st-n;
7915:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
7916:         MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
7917:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
7918:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
7919:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
7920:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
7921:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
7922:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
7923:         ISGetSize(row,&M);
7924:         MatGetSize(coarse_mat,&N,NULL);
7925:         ISDestroy(&row);
7926:         ISDestroy(&col);
7927:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
7928:         MatSetType(coarsedivudotp_is,MATIS);
7929:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
7930:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
7931:         ISLocalToGlobalMappingDestroy(&rl2g);
7932:         ISLocalToGlobalMappingDestroy(&cl2g);
7933:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
7934:         MatDestroy(&coarsedivudotp);
7935:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
7936:         MatDestroy(&coarsedivudotp_is);
7937:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
7938:         if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
7939:       }
7940:     }

7942:     /* propagate symmetry info of coarse matrix */
7943:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
7944:     if (pc->pmat->symmetric_set) {
7945:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
7946:     }
7947:     if (pc->pmat->hermitian_set) {
7948:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
7949:     }
7950:     if (pc->pmat->spd_set) {
7951:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
7952:     }
7953:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
7954:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
7955:     }
7956:     /* set operators */
7957:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
7958:     if (pcbddc->dbg_flag) {
7959:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
7960:     }
7961:   }
7962:   MatDestroy(&coarseG);
7963:   PetscFree(isarray);
7964: #if 0
7965:   {
7966:     PetscViewer viewer;
7967:     char filename[256];
7968:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
7969:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
7970:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
7971:     MatView(coarse_mat,viewer);
7972:     PetscViewerPopFormat(viewer);
7973:     PetscViewerDestroy(&viewer);
7974:   }
7975: #endif

7977:   if (pcbddc->coarse_ksp) {
7978:     Vec crhs,csol;

7980:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
7981:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
7982:     if (!csol) {
7983:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
7984:     }
7985:     if (!crhs) {
7986:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
7987:     }
7988:   }
7989:   MatDestroy(&coarsedivudotp);

7991:   /* compute null space for coarse solver if the benign trick has been requested */
7992:   if (pcbddc->benign_null) {

7994:     VecSet(pcbddc->vec1_P,0.);
7995:     for (i=0;i<pcbddc->benign_n;i++) {
7996:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
7997:     }
7998:     VecAssemblyBegin(pcbddc->vec1_P);
7999:     VecAssemblyEnd(pcbddc->vec1_P);
8000:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8001:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8002:     if (coarse_mat) {
8003:       Vec         nullv;
8004:       PetscScalar *array,*array2;
8005:       PetscInt    nl;

8007:       MatCreateVecs(coarse_mat,&nullv,NULL);
8008:       VecGetLocalSize(nullv,&nl);
8009:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8010:       VecGetArray(nullv,&array2);
8011:       PetscMemcpy(array2,array,nl*sizeof(*array));
8012:       VecRestoreArray(nullv,&array2);
8013:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8014:       VecNormalize(nullv,NULL);
8015:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8016:       VecDestroy(&nullv);
8017:     }
8018:   }

8020:   if (pcbddc->coarse_ksp) {
8021:     PetscBool ispreonly;

8023:     if (CoarseNullSpace) {
8024:       PetscBool isnull;
8025:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8026:       if (isnull) {
8027:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8028:       }
8029:       /* TODO: add local nullspaces (if any) */
8030:     }
8031:     /* setup coarse ksp */
8032:     KSPSetUp(pcbddc->coarse_ksp);
8033:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8034:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8035:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8036:       KSP       check_ksp;
8037:       KSPType   check_ksp_type;
8038:       PC        check_pc;
8039:       Vec       check_vec,coarse_vec;
8040:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8041:       PetscInt  its;
8042:       PetscBool compute_eigs;
8043:       PetscReal *eigs_r,*eigs_c;
8044:       PetscInt  neigs;
8045:       const char *prefix;

8047:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8048:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8049:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8050:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8051:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8052:       /* prevent from setup unneeded object */
8053:       KSPGetPC(check_ksp,&check_pc);
8054:       PCSetType(check_pc,PCNONE);
8055:       if (ispreonly) {
8056:         check_ksp_type = KSPPREONLY;
8057:         compute_eigs = PETSC_FALSE;
8058:       } else {
8059:         check_ksp_type = KSPGMRES;
8060:         compute_eigs = PETSC_TRUE;
8061:       }
8062:       KSPSetType(check_ksp,check_ksp_type);
8063:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8064:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8065:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8066:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8067:       KSPSetOptionsPrefix(check_ksp,prefix);
8068:       KSPAppendOptionsPrefix(check_ksp,"check_");
8069:       KSPSetFromOptions(check_ksp);
8070:       KSPSetUp(check_ksp);
8071:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8072:       KSPSetPC(check_ksp,check_pc);
8073:       /* create random vec */
8074:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8075:       VecSetRandom(check_vec,NULL);
8076:       MatMult(coarse_mat,check_vec,coarse_vec);
8077:       /* solve coarse problem */
8078:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8079:       /* set eigenvalue estimation if preonly has not been requested */
8080:       if (compute_eigs) {
8081:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8082:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8083:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8084:         if (neigs) {
8085:           lambda_max = eigs_r[neigs-1];
8086:           lambda_min = eigs_r[0];
8087:           if (pcbddc->use_coarse_estimates) {
8088:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8089:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8090:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8091:             }
8092:           }
8093:         }
8094:       }

8096:       /* check coarse problem residual error */
8097:       if (pcbddc->dbg_flag) {
8098:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8099:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8100:         VecAXPY(check_vec,-1.0,coarse_vec);
8101:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8102:         MatMult(coarse_mat,check_vec,coarse_vec);
8103:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8104:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8105:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8106:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8107:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8108:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8109:         if (CoarseNullSpace) {
8110:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8111:         }
8112:         if (compute_eigs) {
8113:           PetscReal          lambda_max_s,lambda_min_s;
8114:           KSPConvergedReason reason;
8115:           KSPGetType(check_ksp,&check_ksp_type);
8116:           KSPGetIterationNumber(check_ksp,&its);
8117:           KSPGetConvergedReason(check_ksp,&reason);
8118:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8119:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8120:           for (i=0;i<neigs;i++) {
8121:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8122:           }
8123:         }
8124:         PetscViewerFlush(dbg_viewer);
8125:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8126:       }
8127:       VecDestroy(&check_vec);
8128:       VecDestroy(&coarse_vec);
8129:       KSPDestroy(&check_ksp);
8130:       if (compute_eigs) {
8131:         PetscFree(eigs_r);
8132:         PetscFree(eigs_c);
8133:       }
8134:     }
8135:   }
8136:   MatNullSpaceDestroy(&CoarseNullSpace);
8137:   /* print additional info */
8138:   if (pcbddc->dbg_flag) {
8139:     /* waits until all processes reaches this point */
8140:     PetscBarrier((PetscObject)pc);
8141:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
8142:     PetscViewerFlush(pcbddc->dbg_viewer);
8143:   }

8145:   /* free memory */
8146:   MatDestroy(&coarse_mat);
8147:   return(0);
8148: }

8150: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8151: {
8152:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8153:   PC_IS*         pcis = (PC_IS*)pc->data;
8154:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8155:   IS             subset,subset_mult,subset_n;
8156:   PetscInt       local_size,coarse_size=0;
8157:   PetscInt       *local_primal_indices=NULL;
8158:   const PetscInt *t_local_primal_indices;

8162:   /* Compute global number of coarse dofs */
8163:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8164:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8165:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8166:   ISDestroy(&subset_n);
8167:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8168:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8169:   ISDestroy(&subset);
8170:   ISDestroy(&subset_mult);
8171:   ISGetLocalSize(subset_n,&local_size);
8172:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8173:   PetscMalloc1(local_size,&local_primal_indices);
8174:   ISGetIndices(subset_n,&t_local_primal_indices);
8175:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8176:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8177:   ISDestroy(&subset_n);

8179:   /* check numbering */
8180:   if (pcbddc->dbg_flag) {
8181:     PetscScalar coarsesum,*array,*array2;
8182:     PetscInt    i;
8183:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8185:     PetscViewerFlush(pcbddc->dbg_viewer);
8186:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8187:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8188:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8189:     /* counter */
8190:     VecSet(pcis->vec1_global,0.0);
8191:     VecSet(pcis->vec1_N,1.0);
8192:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8193:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8194:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8195:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8196:     VecSet(pcis->vec1_N,0.0);
8197:     for (i=0;i<pcbddc->local_primal_size;i++) {
8198:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8199:     }
8200:     VecAssemblyBegin(pcis->vec1_N);
8201:     VecAssemblyEnd(pcis->vec1_N);
8202:     VecSet(pcis->vec1_global,0.0);
8203:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8204:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8205:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8206:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8207:     VecGetArray(pcis->vec1_N,&array);
8208:     VecGetArray(pcis->vec2_N,&array2);
8209:     for (i=0;i<pcis->n;i++) {
8210:       if (array[i] != 0.0 && array[i] != array2[i]) {
8211:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8212:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8213:         set_error = PETSC_TRUE;
8214:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8215:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);
8216:       }
8217:     }
8218:     VecRestoreArray(pcis->vec2_N,&array2);
8219:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8220:     PetscViewerFlush(pcbddc->dbg_viewer);
8221:     for (i=0;i<pcis->n;i++) {
8222:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8223:     }
8224:     VecRestoreArray(pcis->vec1_N,&array);
8225:     VecSet(pcis->vec1_global,0.0);
8226:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8227:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8228:     VecSum(pcis->vec1_global,&coarsesum);
8229:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8230:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8231:       PetscInt *gidxs;

8233:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8234:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8235:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8236:       PetscViewerFlush(pcbddc->dbg_viewer);
8237:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8238:       for (i=0;i<pcbddc->local_primal_size;i++) {
8239:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8240:       }
8241:       PetscViewerFlush(pcbddc->dbg_viewer);
8242:       PetscFree(gidxs);
8243:     }
8244:     PetscViewerFlush(pcbddc->dbg_viewer);
8245:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8246:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8247:   }
8248:   /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
8249:   /* get back data */
8250:   *coarse_size_n = coarse_size;
8251:   *local_primal_indices_n = local_primal_indices;
8252:   return(0);
8253: }

8255: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8256: {
8257:   IS             localis_t;
8258:   PetscInt       i,lsize,*idxs,n;
8259:   PetscScalar    *vals;

8263:   /* get indices in local ordering exploiting local to global map */
8264:   ISGetLocalSize(globalis,&lsize);
8265:   PetscMalloc1(lsize,&vals);
8266:   for (i=0;i<lsize;i++) vals[i] = 1.0;
8267:   ISGetIndices(globalis,(const PetscInt**)&idxs);
8268:   VecSet(gwork,0.0);
8269:   VecSet(lwork,0.0);
8270:   if (idxs) { /* multilevel guard */
8271:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8272:   }
8273:   VecAssemblyBegin(gwork);
8274:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8275:   PetscFree(vals);
8276:   VecAssemblyEnd(gwork);
8277:   /* now compute set in local ordering */
8278:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8279:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8280:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8281:   VecGetSize(lwork,&n);
8282:   for (i=0,lsize=0;i<n;i++) {
8283:     if (PetscRealPart(vals[i]) > 0.5) {
8284:       lsize++;
8285:     }
8286:   }
8287:   PetscMalloc1(lsize,&idxs);
8288:   for (i=0,lsize=0;i<n;i++) {
8289:     if (PetscRealPart(vals[i]) > 0.5) {
8290:       idxs[lsize++] = i;
8291:     }
8292:   }
8293:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8294:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8295:   *localis = localis_t;
8296:   return(0);
8297: }

8299: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8300: {
8301:   PC_IS               *pcis=(PC_IS*)pc->data;
8302:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8303:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
8304:   Mat                 S_j;
8305:   PetscInt            *used_xadj,*used_adjncy;
8306:   PetscBool           free_used_adj;
8307:   PetscErrorCode      ierr;

8310:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8311:   free_used_adj = PETSC_FALSE;
8312:   if (pcbddc->sub_schurs_layers == -1) {
8313:     used_xadj = NULL;
8314:     used_adjncy = NULL;
8315:   } else {
8316:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8317:       used_xadj = pcbddc->mat_graph->xadj;
8318:       used_adjncy = pcbddc->mat_graph->adjncy;
8319:     } else if (pcbddc->computed_rowadj) {
8320:       used_xadj = pcbddc->mat_graph->xadj;
8321:       used_adjncy = pcbddc->mat_graph->adjncy;
8322:     } else {
8323:       PetscBool      flg_row=PETSC_FALSE;
8324:       const PetscInt *xadj,*adjncy;
8325:       PetscInt       nvtxs;

8327:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8328:       if (flg_row) {
8329:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8330:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8331:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8332:         free_used_adj = PETSC_TRUE;
8333:       } else {
8334:         pcbddc->sub_schurs_layers = -1;
8335:         used_xadj = NULL;
8336:         used_adjncy = NULL;
8337:       }
8338:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8339:     }
8340:   }

8342:   /* setup sub_schurs data */
8343:   MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8344:   if (!sub_schurs->schur_explicit) {
8345:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8346:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8347:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8348:   } else {
8349:     Mat       change = NULL;
8350:     Vec       scaling = NULL;
8351:     IS        change_primal = NULL, iP;
8352:     PetscInt  benign_n;
8353:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8354:     PetscBool isseqaij,need_change = PETSC_FALSE;
8355:     PetscBool discrete_harmonic = PETSC_FALSE;

8357:     if (!pcbddc->use_vertices && reuse_solvers) {
8358:       PetscInt n_vertices;

8360:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8361:       reuse_solvers = (PetscBool)!n_vertices;
8362:     }
8363:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8364:     if (!isseqaij) {
8365:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8366:       if (matis->A == pcbddc->local_mat) {
8367:         MatDestroy(&pcbddc->local_mat);
8368:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8369:       } else {
8370:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8371:       }
8372:     }
8373:     if (!pcbddc->benign_change_explicit) {
8374:       benign_n = pcbddc->benign_n;
8375:     } else {
8376:       benign_n = 0;
8377:     }
8378:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8379:        We need a global reduction to avoid possible deadlocks.
8380:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8381:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8382:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8383:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8384:       need_change = (PetscBool)(!need_change);
8385:     }
8386:     /* If the user defines additional constraints, we import them here.
8387:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
8388:     if (need_change) {
8389:       PC_IS   *pcisf;
8390:       PC_BDDC *pcbddcf;
8391:       PC      pcf;

8393:       if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
8394:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
8395:       PCSetOperators(pcf,pc->mat,pc->pmat);
8396:       PCSetType(pcf,PCBDDC);

8398:       /* hacks */
8399:       pcisf                        = (PC_IS*)pcf->data;
8400:       pcisf->is_B_local            = pcis->is_B_local;
8401:       pcisf->vec1_N                = pcis->vec1_N;
8402:       pcisf->BtoNmap               = pcis->BtoNmap;
8403:       pcisf->n                     = pcis->n;
8404:       pcisf->n_B                   = pcis->n_B;
8405:       pcbddcf                      = (PC_BDDC*)pcf->data;
8406:       PetscFree(pcbddcf->mat_graph);
8407:       pcbddcf->mat_graph           = pcbddc->mat_graph;
8408:       pcbddcf->use_faces           = PETSC_TRUE;
8409:       pcbddcf->use_change_of_basis = PETSC_TRUE;
8410:       pcbddcf->use_change_on_faces = PETSC_TRUE;
8411:       pcbddcf->use_qr_single       = PETSC_TRUE;
8412:       pcbddcf->fake_change         = PETSC_TRUE;

8414:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
8415:       PCBDDCConstraintsSetUp(pcf);
8416:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
8417:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
8418:       change = pcbddcf->ConstraintMatrix;
8419:       pcbddcf->ConstraintMatrix = NULL;

8421:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
8422:       PetscFree(pcbddcf->sub_schurs);
8423:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
8424:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
8425:       PetscFree(pcbddcf->primal_indices_local_idxs);
8426:       PetscFree(pcbddcf->onearnullvecs_state);
8427:       PetscFree(pcf->data);
8428:       pcf->ops->destroy = NULL;
8429:       pcf->ops->reset   = NULL;
8430:       PCDestroy(&pcf);
8431:     }
8432:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

8434:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
8435:     if (iP) {
8436:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
8437:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
8438:       PetscOptionsEnd();
8439:     }
8440:     if (discrete_harmonic) {
8441:       Mat A;
8442:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
8443:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
8444:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
8445:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8446:       MatDestroy(&A);
8447:     } else {
8448:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8449:     }
8450:     MatDestroy(&change);
8451:     ISDestroy(&change_primal);
8452:   }
8453:   MatDestroy(&S_j);

8455:   /* free adjacency */
8456:   if (free_used_adj) {
8457:     PetscFree2(used_xadj,used_adjncy);
8458:   }
8459:   return(0);
8460: }

8462: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8463: {
8464:   PC_IS               *pcis=(PC_IS*)pc->data;
8465:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8466:   PCBDDCGraph         graph;
8467:   PetscErrorCode      ierr;

8470:   /* attach interface graph for determining subsets */
8471:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8472:     IS       verticesIS,verticescomm;
8473:     PetscInt vsize,*idxs;

8475:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8476:     ISGetSize(verticesIS,&vsize);
8477:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
8478:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
8479:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
8480:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8481:     PCBDDCGraphCreate(&graph);
8482:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
8483:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
8484:     ISDestroy(&verticescomm);
8485:     PCBDDCGraphComputeConnectedComponents(graph);
8486:   } else {
8487:     graph = pcbddc->mat_graph;
8488:   }
8489:   /* print some info */
8490:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8491:     IS       vertices;
8492:     PetscInt nv,nedges,nfaces;
8493:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
8494:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8495:     ISGetSize(vertices,&nv);
8496:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8497:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
8498:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
8499:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
8500:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
8501:     PetscViewerFlush(pcbddc->dbg_viewer);
8502:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8503:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8504:   }

8506:   /* sub_schurs init */
8507:   if (!pcbddc->sub_schurs) {
8508:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8509:   }
8510:   PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
8511:   pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix;

8513:   /* free graph struct */
8514:   if (pcbddc->sub_schurs_rebuild) {
8515:     PCBDDCGraphDestroy(&graph);
8516:   }
8517:   return(0);
8518: }

8520: PetscErrorCode PCBDDCCheckOperator(PC pc)
8521: {
8522:   PC_IS               *pcis=(PC_IS*)pc->data;
8523:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8524:   PetscErrorCode      ierr;

8527:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8528:     IS             zerodiag = NULL;
8529:     Mat            S_j,B0_B=NULL;
8530:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
8531:     PetscScalar    *p0_check,*array,*array2;
8532:     PetscReal      norm;
8533:     PetscInt       i;

8535:     /* B0 and B0_B */
8536:     if (zerodiag) {
8537:       IS       dummy;

8539:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
8540:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
8541:       MatCreateVecs(B0_B,NULL,&dummy_vec);
8542:       ISDestroy(&dummy);
8543:     }
8544:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8545:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
8546:     VecSet(pcbddc->vec1_P,1.0);
8547:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8548:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8549:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8550:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8551:     VecReciprocal(vec_scale_P);
8552:     /* S_j */
8553:     MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8554:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

8556:     /* mimic vector in \widetilde{W}_\Gamma */
8557:     VecSetRandom(pcis->vec1_N,NULL);
8558:     /* continuous in primal space */
8559:     VecSetRandom(pcbddc->coarse_vec,NULL);
8560:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8561:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8562:     VecGetArray(pcbddc->vec1_P,&array);
8563:     PetscCalloc1(pcbddc->benign_n,&p0_check);
8564:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
8565:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8566:     VecRestoreArray(pcbddc->vec1_P,&array);
8567:     VecAssemblyBegin(pcis->vec1_N);
8568:     VecAssemblyEnd(pcis->vec1_N);
8569:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8570:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8571:     VecDuplicate(pcis->vec2_B,&vec_check_B);
8572:     VecCopy(pcis->vec2_B,vec_check_B);

8574:     /* assemble rhs for coarse problem */
8575:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8576:     /* local with Schur */
8577:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
8578:     if (zerodiag) {
8579:       VecGetArray(dummy_vec,&array);
8580:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
8581:       VecRestoreArray(dummy_vec,&array);
8582:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
8583:     }
8584:     /* sum on primal nodes the local contributions */
8585:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8586:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8587:     VecGetArray(pcis->vec1_N,&array);
8588:     VecGetArray(pcbddc->vec1_P,&array2);
8589:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8590:     VecRestoreArray(pcbddc->vec1_P,&array2);
8591:     VecRestoreArray(pcis->vec1_N,&array);
8592:     VecSet(pcbddc->coarse_vec,0.);
8593:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8594:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8595:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8596:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8597:     VecGetArray(pcbddc->vec1_P,&array);
8598:     /* scale primal nodes (BDDC sums contibutions) */
8599:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
8600:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8601:     VecRestoreArray(pcbddc->vec1_P,&array);
8602:     VecAssemblyBegin(pcis->vec1_N);
8603:     VecAssemblyEnd(pcis->vec1_N);
8604:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8605:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8606:     /* global: \widetilde{B0}_B w_\Gamma */
8607:     if (zerodiag) {
8608:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
8609:       VecGetArray(dummy_vec,&array);
8610:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
8611:       VecRestoreArray(dummy_vec,&array);
8612:     }
8613:     /* BDDC */
8614:     VecSet(pcis->vec1_D,0.);
8615:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

8617:     VecCopy(pcis->vec1_B,pcis->vec2_B);
8618:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
8619:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
8620:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
8621:     for (i=0;i<pcbddc->benign_n;i++) {
8622:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
8623:     }
8624:     PetscFree(p0_check);
8625:     VecDestroy(&vec_scale_P);
8626:     VecDestroy(&vec_check_B);
8627:     VecDestroy(&dummy_vec);
8628:     MatDestroy(&S_j);
8629:     MatDestroy(&B0_B);
8630:   }
8631:   return(0);
8632: }

8634:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
8635: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8636: {
8637:   Mat            At;
8638:   IS             rows;
8639:   PetscInt       rst,ren;
8641:   PetscLayout    rmap;

8644:   rst = ren = 0;
8645:   if (ccomm != MPI_COMM_NULL) {
8646:     PetscLayoutCreate(ccomm,&rmap);
8647:     PetscLayoutSetSize(rmap,A->rmap->N);
8648:     PetscLayoutSetBlockSize(rmap,1);
8649:     PetscLayoutSetUp(rmap);
8650:     PetscLayoutGetRange(rmap,&rst,&ren);
8651:   }
8652:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
8653:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
8654:   ISDestroy(&rows);

8656:   if (ccomm != MPI_COMM_NULL) {
8657:     Mat_MPIAIJ *a,*b;
8658:     IS         from,to;
8659:     Vec        gvec;
8660:     PetscInt   lsize;

8662:     MatCreate(ccomm,B);
8663:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
8664:     MatSetType(*B,MATAIJ);
8665:     PetscLayoutDestroy(&((*B)->rmap));
8666:     PetscLayoutSetUp((*B)->cmap);
8667:     a    = (Mat_MPIAIJ*)At->data;
8668:     b    = (Mat_MPIAIJ*)(*B)->data;
8669:     MPI_Comm_size(ccomm,&b->size);
8670:     MPI_Comm_rank(ccomm,&b->rank);
8671:     PetscObjectReference((PetscObject)a->A);
8672:     PetscObjectReference((PetscObject)a->B);
8673:     b->A = a->A;
8674:     b->B = a->B;

8676:     b->donotstash      = a->donotstash;
8677:     b->roworiented     = a->roworiented;
8678:     b->rowindices      = 0;
8679:     b->rowvalues       = 0;
8680:     b->getrowactive    = PETSC_FALSE;

8682:     (*B)->rmap         = rmap;
8683:     (*B)->factortype   = A->factortype;
8684:     (*B)->assembled    = PETSC_TRUE;
8685:     (*B)->insertmode   = NOT_SET_VALUES;
8686:     (*B)->preallocated = PETSC_TRUE;

8688:     if (a->colmap) {
8689: #if defined(PETSC_USE_CTABLE)
8690:       PetscTableCreateCopy(a->colmap,&b->colmap);
8691: #else
8692:       PetscMalloc1(At->cmap->N,&b->colmap);
8693:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
8694:       PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
8695: #endif
8696:     } else b->colmap = 0;
8697:     if (a->garray) {
8698:       PetscInt len;
8699:       len  = a->B->cmap->n;
8700:       PetscMalloc1(len+1,&b->garray);
8701:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
8702:       if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
8703:     } else b->garray = 0;

8705:     PetscObjectReference((PetscObject)a->lvec);
8706:     b->lvec = a->lvec;
8707:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

8709:     /* cannot use VecScatterCopy */
8710:     VecGetLocalSize(b->lvec,&lsize);
8711:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
8712:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
8713:     MatCreateVecs(*B,&gvec,NULL);
8714:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
8715:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
8716:     ISDestroy(&from);
8717:     ISDestroy(&to);
8718:     VecDestroy(&gvec);
8719:   }
8720:   MatDestroy(&At);
8721:   return(0);
8722: }