Actual source code: bddcprivate.c
petsc-3.12.0 2019-09-29
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: #if !defined(PETSC_USE_COMPLEX)
18: PetscScalar *uwork,*data,*U, ds = 0.;
19: PetscReal *sing;
20: PetscBLASInt bM,bN,lwork,lierr,di = 1;
21: PetscInt ulw,i,nr,nc,n;
25: #if defined(PETSC_MISSING_LAPACK_GESVD)
26: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
27: #else
28: MatGetSize(A,&nr,&nc);
29: if (!nr || !nc) return(0);
31: /* workspace */
32: if (!work) {
33: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
34: PetscMalloc1(ulw,&uwork);
35: } else {
36: ulw = lw;
37: uwork = work;
38: }
39: n = PetscMin(nr,nc);
40: if (!rwork) {
41: PetscMalloc1(n,&sing);
42: } else {
43: sing = rwork;
44: }
46: /* SVD */
47: PetscMalloc1(nr*nr,&U);
48: PetscBLASIntCast(nr,&bM);
49: PetscBLASIntCast(nc,&bN);
50: PetscBLASIntCast(ulw,&lwork);
51: MatDenseGetArray(A,&data);
52: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
53: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
54: PetscFPTrapPop();
55: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
56: MatDenseRestoreArray(A,&data);
57: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
58: if (!rwork) {
59: PetscFree(sing);
60: }
61: if (!work) {
62: PetscFree(uwork);
63: }
64: /* create B */
65: if (!range) {
66: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
67: MatDenseGetArray(*B,&data);
68: PetscArraycpy(data,U+nr*i,(nr-i)*nr);
69: } else {
70: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
71: MatDenseGetArray(*B,&data);
72: PetscArraycpy(data,U,i*nr);
73: }
74: MatDenseRestoreArray(*B,&data);
75: PetscFree(U);
76: #endif
77: #else /* PETSC_USE_COMPLEX */
79: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
80: #endif
81: return(0);
82: }
84: /* TODO REMOVE */
85: #if defined(PRINT_GDET)
86: static int inc = 0;
87: static int lev = 0;
88: #endif
90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
91: {
93: Mat GE,GEd;
94: PetscInt rsize,csize,esize;
95: PetscScalar *ptr;
98: ISGetSize(edge,&esize);
99: if (!esize) return(0);
100: ISGetSize(extrow,&rsize);
101: ISGetSize(extcol,&csize);
103: /* gradients */
104: ptr = work + 5*esize;
105: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108: MatDestroy(&GE);
110: /* constants */
111: ptr += rsize*csize;
112: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115: MatDestroy(&GE);
116: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117: MatDestroy(&GEd);
119: if (corners) {
120: Mat GEc;
121: const PetscScalar *vals;
122: PetscScalar v;
124: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
125: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
126: MatDenseGetArrayRead(GEd,&vals);
127: /* v = PetscAbsScalar(vals[0]) */;
128: v = 1.;
129: cvals[0] = vals[0]/v;
130: cvals[1] = vals[1]/v;
131: MatDenseRestoreArrayRead(GEd,&vals);
132: MatScale(*GKins,1./v);
133: #if defined(PRINT_GDET)
134: {
135: PetscViewer viewer;
136: char filename[256];
137: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
138: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
139: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
140: PetscObjectSetName((PetscObject)GEc,"GEc");
141: MatView(GEc,viewer);
142: PetscObjectSetName((PetscObject)(*GKins),"GK");
143: MatView(*GKins,viewer);
144: PetscObjectSetName((PetscObject)GEd,"Gproj");
145: MatView(GEd,viewer);
146: PetscViewerDestroy(&viewer);
147: }
148: #endif
149: MatDestroy(&GEd);
150: MatDestroy(&GEc);
151: }
153: return(0);
154: }
156: PetscErrorCode PCBDDCNedelecSupport(PC pc)
157: {
158: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
159: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
160: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
161: Vec tvec;
162: PetscSF sfv;
163: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
164: MPI_Comm comm;
165: IS lned,primals,allprimals,nedfieldlocal;
166: IS *eedges,*extrows,*extcols,*alleedges;
167: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
168: PetscScalar *vals,*work;
169: PetscReal *rwork;
170: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
171: PetscInt ne,nv,Lv,order,n,field;
172: PetscInt n_neigh,*neigh,*n_shared,**shared;
173: PetscInt i,j,extmem,cum,maxsize,nee;
174: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
175: PetscInt *sfvleaves,*sfvroots;
176: PetscInt *corners,*cedges;
177: PetscInt *ecount,**eneighs,*vcount,**vneighs;
178: #if defined(PETSC_USE_DEBUG)
179: PetscInt *emarks;
180: #endif
181: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
182: PetscErrorCode ierr;
185: /* If the discrete gradient is defined for a subset of dofs and global is true,
186: it assumes G is given in global ordering for all the dofs.
187: Otherwise, the ordering is global for the Nedelec field */
188: order = pcbddc->nedorder;
189: conforming = pcbddc->conforming;
190: field = pcbddc->nedfield;
191: global = pcbddc->nedglobal;
192: setprimal = PETSC_FALSE;
193: print = PETSC_FALSE;
194: singular = PETSC_FALSE;
196: /* Command line customization */
197: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
198: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
199: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
200: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
201: /* print debug info TODO: to be removed */
202: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
203: PetscOptionsEnd();
205: /* Return if there are no edges in the decomposition and the problem is not singular */
206: MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
207: ISLocalToGlobalMappingGetSize(al2g,&n);
208: PetscObjectGetComm((PetscObject)pc,&comm);
209: if (!singular) {
210: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
211: lrc[0] = PETSC_FALSE;
212: for (i=0;i<n;i++) {
213: if (PetscRealPart(vals[i]) > 2.) {
214: lrc[0] = PETSC_TRUE;
215: break;
216: }
217: }
218: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
219: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
220: if (!lrc[1]) return(0);
221: }
223: /* Get Nedelec field */
224: if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
225: if (pcbddc->n_ISForDofsLocal && field >= 0) {
226: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
227: nedfieldlocal = pcbddc->ISForDofsLocal[field];
228: ISGetLocalSize(nedfieldlocal,&ne);
229: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
230: ne = n;
231: nedfieldlocal = NULL;
232: global = PETSC_TRUE;
233: } else if (field == PETSC_DECIDE) {
234: PetscInt rst,ren,*idx;
236: PetscArrayzero(matis->sf_leafdata,n);
237: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
238: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
239: for (i=rst;i<ren;i++) {
240: PetscInt nc;
242: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
244: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
245: }
246: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
248: PetscMalloc1(n,&idx);
249: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
250: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
251: } else {
252: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
253: }
255: /* Sanity checks */
256: if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
257: if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
258: if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);
260: /* Just set primal dofs and return */
261: if (setprimal) {
262: IS enedfieldlocal;
263: PetscInt *eidxs;
265: PetscMalloc1(ne,&eidxs);
266: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
267: if (nedfieldlocal) {
268: ISGetIndices(nedfieldlocal,&idxs);
269: for (i=0,cum=0;i<ne;i++) {
270: if (PetscRealPart(vals[idxs[i]]) > 2.) {
271: eidxs[cum++] = idxs[i];
272: }
273: }
274: ISRestoreIndices(nedfieldlocal,&idxs);
275: } else {
276: for (i=0,cum=0;i<ne;i++) {
277: if (PetscRealPart(vals[i]) > 2.) {
278: eidxs[cum++] = i;
279: }
280: }
281: }
282: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
283: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
284: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
285: PetscFree(eidxs);
286: ISDestroy(&nedfieldlocal);
287: ISDestroy(&enedfieldlocal);
288: return(0);
289: }
291: /* Compute some l2g maps */
292: if (nedfieldlocal) {
293: IS is;
295: /* need to map from the local Nedelec field to local numbering */
296: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
297: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
298: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
299: ISLocalToGlobalMappingCreateIS(is,&al2g);
300: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
301: if (global) {
302: PetscObjectReference((PetscObject)al2g);
303: el2g = al2g;
304: } else {
305: IS gis;
307: ISRenumber(is,NULL,NULL,&gis);
308: ISLocalToGlobalMappingCreateIS(gis,&el2g);
309: ISDestroy(&gis);
310: }
311: ISDestroy(&is);
312: } else {
313: /* restore default */
314: pcbddc->nedfield = -1;
315: /* one ref for the destruction of al2g, one for el2g */
316: PetscObjectReference((PetscObject)al2g);
317: PetscObjectReference((PetscObject)al2g);
318: el2g = al2g;
319: fl2g = NULL;
320: }
322: /* Start communication to drop connections for interior edges (for cc analysis only) */
323: PetscArrayzero(matis->sf_leafdata,n);
324: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
325: if (nedfieldlocal) {
326: ISGetIndices(nedfieldlocal,&idxs);
327: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
328: ISRestoreIndices(nedfieldlocal,&idxs);
329: } else {
330: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
331: }
332: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
333: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
335: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
336: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
337: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
338: if (global) {
339: PetscInt rst;
341: MatGetOwnershipRange(G,&rst,NULL);
342: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
343: if (matis->sf_rootdata[i] < 2) {
344: matis->sf_rootdata[cum++] = i + rst;
345: }
346: }
347: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
348: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
349: } else {
350: PetscInt *tbz;
352: PetscMalloc1(ne,&tbz);
353: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
355: ISGetIndices(nedfieldlocal,&idxs);
356: for (i=0,cum=0;i<ne;i++)
357: if (matis->sf_leafdata[idxs[i]] == 1)
358: tbz[cum++] = i;
359: ISRestoreIndices(nedfieldlocal,&idxs);
360: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
361: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
362: PetscFree(tbz);
363: }
364: } else { /* we need the entire G to infer the nullspace */
365: PetscObjectReference((PetscObject)pcbddc->discretegradient);
366: G = pcbddc->discretegradient;
367: }
369: /* Extract subdomain relevant rows of G */
370: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
371: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
372: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
373: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
374: ISDestroy(&lned);
375: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
376: MatDestroy(&lGall);
377: MatISGetLocalMat(lGis,&lG);
379: /* SF for nodal dofs communications */
380: MatGetLocalSize(G,NULL,&Lv);
381: MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
382: PetscObjectReference((PetscObject)vl2g);
383: ISLocalToGlobalMappingGetSize(vl2g,&nv);
384: PetscSFCreate(comm,&sfv);
385: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
386: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
387: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
388: i = singular ? 2 : 1;
389: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
391: /* Destroy temporary G created in MATIS format and modified G */
392: PetscObjectReference((PetscObject)lG);
393: MatDestroy(&lGis);
394: MatDestroy(&G);
396: if (print) {
397: PetscObjectSetName((PetscObject)lG,"initial_lG");
398: MatView(lG,NULL);
399: }
401: /* Save lG for values insertion in change of basis */
402: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
404: /* Analyze the edge-nodes connections (duplicate lG) */
405: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
406: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
407: PetscBTCreate(nv,&btv);
408: PetscBTCreate(ne,&bte);
409: PetscBTCreate(ne,&btb);
410: PetscBTCreate(ne,&btbd);
411: PetscBTCreate(nv,&btvcand);
412: /* need to import the boundary specification to ensure the
413: proper detection of coarse edges' endpoints */
414: if (pcbddc->DirichletBoundariesLocal) {
415: IS is;
417: if (fl2g) {
418: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
419: } else {
420: is = pcbddc->DirichletBoundariesLocal;
421: }
422: ISGetLocalSize(is,&cum);
423: ISGetIndices(is,&idxs);
424: for (i=0;i<cum;i++) {
425: if (idxs[i] >= 0) {
426: PetscBTSet(btb,idxs[i]);
427: PetscBTSet(btbd,idxs[i]);
428: }
429: }
430: ISRestoreIndices(is,&idxs);
431: if (fl2g) {
432: ISDestroy(&is);
433: }
434: }
435: if (pcbddc->NeumannBoundariesLocal) {
436: IS is;
438: if (fl2g) {
439: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
440: } else {
441: is = pcbddc->NeumannBoundariesLocal;
442: }
443: ISGetLocalSize(is,&cum);
444: ISGetIndices(is,&idxs);
445: for (i=0;i<cum;i++) {
446: if (idxs[i] >= 0) {
447: PetscBTSet(btb,idxs[i]);
448: }
449: }
450: ISRestoreIndices(is,&idxs);
451: if (fl2g) {
452: ISDestroy(&is);
453: }
454: }
456: /* Count neighs per dof */
457: ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
458: ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);
460: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
461: for proper detection of coarse edges' endpoints */
462: PetscBTCreate(ne,&btee);
463: for (i=0;i<ne;i++) {
464: if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
465: PetscBTSet(btee,i);
466: }
467: }
468: PetscMalloc1(ne,&marks);
469: if (!conforming) {
470: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
471: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
472: }
473: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
474: MatSeqAIJGetArray(lGe,&vals);
475: cum = 0;
476: for (i=0;i<ne;i++) {
477: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
478: if (!PetscBTLookup(btee,i)) {
479: marks[cum++] = i;
480: continue;
481: }
482: /* set badly connected edge dofs as primal */
483: if (!conforming) {
484: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
485: marks[cum++] = i;
486: PetscBTSet(bte,i);
487: for (j=ii[i];j<ii[i+1];j++) {
488: PetscBTSet(btv,jj[j]);
489: }
490: } else {
491: /* every edge dofs should be connected trough a certain number of nodal dofs
492: to other edge dofs belonging to coarse edges
493: - at most 2 endpoints
494: - order-1 interior nodal dofs
495: - no undefined nodal dofs (nconn < order)
496: */
497: PetscInt ends = 0,ints = 0, undef = 0;
498: for (j=ii[i];j<ii[i+1];j++) {
499: PetscInt v = jj[j],k;
500: PetscInt nconn = iit[v+1]-iit[v];
501: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
502: if (nconn > order) ends++;
503: else if (nconn == order) ints++;
504: else undef++;
505: }
506: if (undef || ends > 2 || ints != order -1) {
507: marks[cum++] = i;
508: PetscBTSet(bte,i);
509: for (j=ii[i];j<ii[i+1];j++) {
510: PetscBTSet(btv,jj[j]);
511: }
512: }
513: }
514: }
515: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
516: if (!order && ii[i+1] != ii[i]) {
517: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
518: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
519: }
520: }
521: PetscBTDestroy(&btee);
522: MatSeqAIJRestoreArray(lGe,&vals);
523: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
524: if (!conforming) {
525: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
526: MatDestroy(&lGt);
527: }
528: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
530: /* identify splitpoints and corner candidates */
531: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
532: if (print) {
533: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
534: MatView(lGe,NULL);
535: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
536: MatView(lGt,NULL);
537: }
538: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
539: MatSeqAIJGetArray(lGt,&vals);
540: for (i=0;i<nv;i++) {
541: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
542: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
543: if (!order) { /* variable order */
544: PetscReal vorder = 0.;
546: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
547: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
548: if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
549: ord = 1;
550: }
551: #if defined(PETSC_USE_DEBUG)
552: if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
553: #endif
554: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
555: if (PetscBTLookup(btbd,jj[j])) {
556: bdir = PETSC_TRUE;
557: break;
558: }
559: if (vc != ecount[jj[j]]) {
560: sneighs = PETSC_FALSE;
561: } else {
562: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
563: for (k=0;k<vc;k++) {
564: if (vn[k] != en[k]) {
565: sneighs = PETSC_FALSE;
566: break;
567: }
568: }
569: }
570: }
571: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
572: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
573: PetscBTSet(btv,i);
574: } else if (test == ord) {
575: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
576: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
577: PetscBTSet(btv,i);
578: } else {
579: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
580: PetscBTSet(btvcand,i);
581: }
582: }
583: }
584: ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
585: ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
586: PetscBTDestroy(&btbd);
588: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
589: if (order != 1) {
590: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
591: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
592: for (i=0;i<nv;i++) {
593: if (PetscBTLookup(btvcand,i)) {
594: PetscBool found = PETSC_FALSE;
595: for (j=ii[i];j<ii[i+1] && !found;j++) {
596: PetscInt k,e = jj[j];
597: if (PetscBTLookup(bte,e)) continue;
598: for (k=iit[e];k<iit[e+1];k++) {
599: PetscInt v = jjt[k];
600: if (v != i && PetscBTLookup(btvcand,v)) {
601: found = PETSC_TRUE;
602: break;
603: }
604: }
605: }
606: if (!found) {
607: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i);
608: PetscBTClear(btvcand,i);
609: } else {
610: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i);
611: }
612: }
613: }
614: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
615: }
616: MatSeqAIJRestoreArray(lGt,&vals);
617: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
618: MatDestroy(&lGe);
620: /* Get the local G^T explicitly */
621: MatDestroy(&lGt);
622: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
623: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
625: /* Mark interior nodal dofs */
626: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
627: PetscBTCreate(nv,&btvi);
628: for (i=1;i<n_neigh;i++) {
629: for (j=0;j<n_shared[i];j++) {
630: PetscBTSet(btvi,shared[i][j]);
631: }
632: }
633: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
635: /* communicate corners and splitpoints */
636: PetscMalloc1(nv,&vmarks);
637: PetscArrayzero(sfvleaves,nv);
638: PetscArrayzero(sfvroots,Lv);
639: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
641: if (print) {
642: IS tbz;
644: cum = 0;
645: for (i=0;i<nv;i++)
646: if (sfvleaves[i])
647: vmarks[cum++] = i;
649: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
650: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
651: ISView(tbz,NULL);
652: ISDestroy(&tbz);
653: }
655: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
656: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
657: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
658: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);
660: /* Zero rows of lGt corresponding to identified corners
661: and interior nodal dofs */
662: cum = 0;
663: for (i=0;i<nv;i++) {
664: if (sfvleaves[i]) {
665: vmarks[cum++] = i;
666: PetscBTSet(btv,i);
667: }
668: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
669: }
670: PetscBTDestroy(&btvi);
671: if (print) {
672: IS tbz;
674: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
675: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
676: ISView(tbz,NULL);
677: ISDestroy(&tbz);
678: }
679: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
680: PetscFree(vmarks);
681: PetscSFDestroy(&sfv);
682: PetscFree2(sfvleaves,sfvroots);
684: /* Recompute G */
685: MatDestroy(&lG);
686: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
687: if (print) {
688: PetscObjectSetName((PetscObject)lG,"used_lG");
689: MatView(lG,NULL);
690: PetscObjectSetName((PetscObject)lGt,"used_lGt");
691: MatView(lGt,NULL);
692: }
694: /* Get primal dofs (if any) */
695: cum = 0;
696: for (i=0;i<ne;i++) {
697: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
698: }
699: if (fl2g) {
700: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
701: }
702: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
703: if (print) {
704: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
705: ISView(primals,NULL);
706: }
707: PetscBTDestroy(&bte);
708: /* TODO: what if the user passed in some of them ? */
709: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
710: ISDestroy(&primals);
712: /* Compute edge connectivity */
713: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
714: MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
715: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
716: if (fl2g) {
717: PetscBT btf;
718: PetscInt *iia,*jja,*iiu,*jju;
719: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
721: /* create CSR for all local dofs */
722: PetscMalloc1(n+1,&iia);
723: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
724: if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
725: iiu = pcbddc->mat_graph->xadj;
726: jju = pcbddc->mat_graph->adjncy;
727: } else if (pcbddc->use_local_adj) {
728: rest = PETSC_TRUE;
729: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
730: } else {
731: free = PETSC_TRUE;
732: PetscMalloc2(n+1,&iiu,n,&jju);
733: iiu[0] = 0;
734: for (i=0;i<n;i++) {
735: iiu[i+1] = i+1;
736: jju[i] = -1;
737: }
738: }
740: /* import sizes of CSR */
741: iia[0] = 0;
742: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
744: /* overwrite entries corresponding to the Nedelec field */
745: PetscBTCreate(n,&btf);
746: ISGetIndices(nedfieldlocal,&idxs);
747: for (i=0;i<ne;i++) {
748: PetscBTSet(btf,idxs[i]);
749: iia[idxs[i]+1] = ii[i+1]-ii[i];
750: }
752: /* iia in CSR */
753: for (i=0;i<n;i++) iia[i+1] += iia[i];
755: /* jja in CSR */
756: PetscMalloc1(iia[n],&jja);
757: for (i=0;i<n;i++)
758: if (!PetscBTLookup(btf,i))
759: for (j=0;j<iiu[i+1]-iiu[i];j++)
760: jja[iia[i]+j] = jju[iiu[i]+j];
762: /* map edge dofs connectivity */
763: if (jj) {
764: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
765: for (i=0;i<ne;i++) {
766: PetscInt e = idxs[i];
767: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
768: }
769: }
770: ISRestoreIndices(nedfieldlocal,&idxs);
771: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
772: if (rest) {
773: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
774: }
775: if (free) {
776: PetscFree2(iiu,jju);
777: }
778: PetscBTDestroy(&btf);
779: } else {
780: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
781: }
783: /* Analyze interface for edge dofs */
784: PCBDDCAnalyzeInterface(pc);
785: pcbddc->mat_graph->twodim = PETSC_FALSE;
787: /* Get coarse edges in the edge space */
788: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
789: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
791: if (fl2g) {
792: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
793: PetscMalloc1(nee,&eedges);
794: for (i=0;i<nee;i++) {
795: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
796: }
797: } else {
798: eedges = alleedges;
799: primals = allprimals;
800: }
802: /* Mark fine edge dofs with their coarse edge id */
803: PetscArrayzero(marks,ne);
804: ISGetLocalSize(primals,&cum);
805: ISGetIndices(primals,&idxs);
806: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
807: ISRestoreIndices(primals,&idxs);
808: if (print) {
809: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
810: ISView(primals,NULL);
811: }
813: maxsize = 0;
814: for (i=0;i<nee;i++) {
815: PetscInt size,mark = i+1;
817: ISGetLocalSize(eedges[i],&size);
818: ISGetIndices(eedges[i],&idxs);
819: for (j=0;j<size;j++) marks[idxs[j]] = mark;
820: ISRestoreIndices(eedges[i],&idxs);
821: maxsize = PetscMax(maxsize,size);
822: }
824: /* Find coarse edge endpoints */
825: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
826: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
827: for (i=0;i<nee;i++) {
828: PetscInt mark = i+1,size;
830: ISGetLocalSize(eedges[i],&size);
831: if (!size && nedfieldlocal) continue;
832: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
833: ISGetIndices(eedges[i],&idxs);
834: if (print) {
835: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
836: ISView(eedges[i],NULL);
837: }
838: for (j=0;j<size;j++) {
839: PetscInt k, ee = idxs[j];
840: if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee);
841: for (k=ii[ee];k<ii[ee+1];k++) {
842: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]);
843: if (PetscBTLookup(btv,jj[k])) {
844: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]);
845: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
846: PetscInt k2;
847: PetscBool corner = PETSC_FALSE;
848: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
849: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
850: /* it's a corner if either is connected with an edge dof belonging to a different cc or
851: if the edge dof lie on the natural part of the boundary */
852: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
853: corner = PETSC_TRUE;
854: break;
855: }
856: }
857: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
858: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]);
859: PetscBTSet(btv,jj[k]);
860: } else {
861: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
862: }
863: }
864: }
865: }
866: ISRestoreIndices(eedges[i],&idxs);
867: }
868: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
869: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
870: PetscBTDestroy(&btb);
872: /* Reset marked primal dofs */
873: ISGetLocalSize(primals,&cum);
874: ISGetIndices(primals,&idxs);
875: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
876: ISRestoreIndices(primals,&idxs);
878: /* Now use the initial lG */
879: MatDestroy(&lG);
880: MatDestroy(&lGt);
881: lG = lGinit;
882: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
884: /* Compute extended cols indices */
885: PetscBTCreate(nv,&btvc);
886: PetscBTCreate(nee,&bter);
887: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
888: MatSeqAIJGetMaxRowNonzeros(lG,&i);
889: i *= maxsize;
890: PetscCalloc1(nee,&extcols);
891: PetscMalloc2(i,&extrow,i,&gidxs);
892: eerr = PETSC_FALSE;
893: for (i=0;i<nee;i++) {
894: PetscInt size,found = 0;
896: cum = 0;
897: ISGetLocalSize(eedges[i],&size);
898: if (!size && nedfieldlocal) continue;
899: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
900: ISGetIndices(eedges[i],&idxs);
901: PetscBTMemzero(nv,btvc);
902: for (j=0;j<size;j++) {
903: PetscInt k,ee = idxs[j];
904: for (k=ii[ee];k<ii[ee+1];k++) {
905: PetscInt vv = jj[k];
906: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
907: else if (!PetscBTLookupSet(btvc,vv)) found++;
908: }
909: }
910: ISRestoreIndices(eedges[i],&idxs);
911: PetscSortRemoveDupsInt(&cum,extrow);
912: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
913: PetscSortIntWithArray(cum,gidxs,extrow);
914: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
915: /* it may happen that endpoints are not defined at this point
916: if it is the case, mark this edge for a second pass */
917: if (cum != size -1 || found != 2) {
918: PetscBTSet(bter,i);
919: if (print) {
920: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
921: ISView(eedges[i],NULL);
922: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
923: ISView(extcols[i],NULL);
924: }
925: eerr = PETSC_TRUE;
926: }
927: }
928: /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
929: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
930: if (done) {
931: PetscInt *newprimals;
933: PetscMalloc1(ne,&newprimals);
934: ISGetLocalSize(primals,&cum);
935: ISGetIndices(primals,&idxs);
936: PetscArraycpy(newprimals,idxs,cum);
937: ISRestoreIndices(primals,&idxs);
938: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
939: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
940: for (i=0;i<nee;i++) {
941: PetscBool has_candidates = PETSC_FALSE;
942: if (PetscBTLookup(bter,i)) {
943: PetscInt size,mark = i+1;
945: ISGetLocalSize(eedges[i],&size);
946: ISGetIndices(eedges[i],&idxs);
947: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
948: for (j=0;j<size;j++) {
949: PetscInt k,ee = idxs[j];
950: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
951: for (k=ii[ee];k<ii[ee+1];k++) {
952: /* set all candidates located on the edge as corners */
953: if (PetscBTLookup(btvcand,jj[k])) {
954: PetscInt k2,vv = jj[k];
955: has_candidates = PETSC_TRUE;
956: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv);
957: PetscBTSet(btv,vv);
958: /* set all edge dofs connected to candidate as primals */
959: for (k2=iit[vv];k2<iit[vv+1];k2++) {
960: if (marks[jjt[k2]] == mark) {
961: PetscInt k3,ee2 = jjt[k2];
962: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2);
963: newprimals[cum++] = ee2;
964: /* finally set the new corners */
965: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
966: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]);
967: PetscBTSet(btv,jj[k3]);
968: }
969: }
970: }
971: } else {
972: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]);
973: }
974: }
975: }
976: if (!has_candidates) { /* circular edge */
977: PetscInt k, ee = idxs[0],*tmarks;
979: PetscCalloc1(ne,&tmarks);
980: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i);
981: for (k=ii[ee];k<ii[ee+1];k++) {
982: PetscInt k2;
983: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]);
984: PetscBTSet(btv,jj[k]);
985: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
986: }
987: for (j=0;j<size;j++) {
988: if (tmarks[idxs[j]] > 1) {
989: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]);
990: newprimals[cum++] = idxs[j];
991: }
992: }
993: PetscFree(tmarks);
994: }
995: ISRestoreIndices(eedges[i],&idxs);
996: }
997: ISDestroy(&extcols[i]);
998: }
999: PetscFree(extcols);
1000: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1001: PetscSortRemoveDupsInt(&cum,newprimals);
1002: if (fl2g) {
1003: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1004: ISDestroy(&primals);
1005: for (i=0;i<nee;i++) {
1006: ISDestroy(&eedges[i]);
1007: }
1008: PetscFree(eedges);
1009: }
1010: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1011: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1012: PetscFree(newprimals);
1013: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1014: ISDestroy(&primals);
1015: PCBDDCAnalyzeInterface(pc);
1016: pcbddc->mat_graph->twodim = PETSC_FALSE;
1017: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1018: if (fl2g) {
1019: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1020: PetscMalloc1(nee,&eedges);
1021: for (i=0;i<nee;i++) {
1022: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1023: }
1024: } else {
1025: eedges = alleedges;
1026: primals = allprimals;
1027: }
1028: PetscCalloc1(nee,&extcols);
1030: /* Mark again */
1031: PetscArrayzero(marks,ne);
1032: for (i=0;i<nee;i++) {
1033: PetscInt size,mark = i+1;
1035: ISGetLocalSize(eedges[i],&size);
1036: ISGetIndices(eedges[i],&idxs);
1037: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1038: ISRestoreIndices(eedges[i],&idxs);
1039: }
1040: if (print) {
1041: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1042: ISView(primals,NULL);
1043: }
1045: /* Recompute extended cols */
1046: eerr = PETSC_FALSE;
1047: for (i=0;i<nee;i++) {
1048: PetscInt size;
1050: cum = 0;
1051: ISGetLocalSize(eedges[i],&size);
1052: if (!size && nedfieldlocal) continue;
1053: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1054: ISGetIndices(eedges[i],&idxs);
1055: for (j=0;j<size;j++) {
1056: PetscInt k,ee = idxs[j];
1057: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1058: }
1059: ISRestoreIndices(eedges[i],&idxs);
1060: PetscSortRemoveDupsInt(&cum,extrow);
1061: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1062: PetscSortIntWithArray(cum,gidxs,extrow);
1063: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1064: if (cum != size -1) {
1065: if (print) {
1066: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1067: ISView(eedges[i],NULL);
1068: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1069: ISView(extcols[i],NULL);
1070: }
1071: eerr = PETSC_TRUE;
1072: }
1073: }
1074: }
1075: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1076: PetscFree2(extrow,gidxs);
1077: PetscBTDestroy(&bter);
1078: if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1079: /* an error should not occur at this point */
1080: if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1082: /* Check the number of endpoints */
1083: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1084: PetscMalloc1(2*nee,&corners);
1085: PetscMalloc1(nee,&cedges);
1086: for (i=0;i<nee;i++) {
1087: PetscInt size, found = 0, gc[2];
1089: /* init with defaults */
1090: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1091: ISGetLocalSize(eedges[i],&size);
1092: if (!size && nedfieldlocal) continue;
1093: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1094: ISGetIndices(eedges[i],&idxs);
1095: PetscBTMemzero(nv,btvc);
1096: for (j=0;j<size;j++) {
1097: PetscInt k,ee = idxs[j];
1098: for (k=ii[ee];k<ii[ee+1];k++) {
1099: PetscInt vv = jj[k];
1100: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1101: if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1102: corners[i*2+found++] = vv;
1103: }
1104: }
1105: }
1106: if (found != 2) {
1107: PetscInt e;
1108: if (fl2g) {
1109: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1110: } else {
1111: e = idxs[0];
1112: }
1113: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1114: }
1116: /* get primal dof index on this coarse edge */
1117: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1118: if (gc[0] > gc[1]) {
1119: PetscInt swap = corners[2*i];
1120: corners[2*i] = corners[2*i+1];
1121: corners[2*i+1] = swap;
1122: }
1123: cedges[i] = idxs[size-1];
1124: ISRestoreIndices(eedges[i],&idxs);
1125: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1126: }
1127: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1128: PetscBTDestroy(&btvc);
1130: #if defined(PETSC_USE_DEBUG)
1131: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1132: not interfere with neighbouring coarse edges */
1133: PetscMalloc1(nee+1,&emarks);
1134: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1135: for (i=0;i<nv;i++) {
1136: PetscInt emax = 0,eemax = 0;
1138: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1139: PetscArrayzero(emarks,nee+1);
1140: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1141: for (j=1;j<nee+1;j++) {
1142: if (emax < emarks[j]) {
1143: emax = emarks[j];
1144: eemax = j;
1145: }
1146: }
1147: /* not relevant for edges */
1148: if (!eemax) continue;
1150: for (j=ii[i];j<ii[i+1];j++) {
1151: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1152: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1153: }
1154: }
1155: }
1156: PetscFree(emarks);
1157: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1158: #endif
1160: /* Compute extended rows indices for edge blocks of the change of basis */
1161: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1162: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1163: extmem *= maxsize;
1164: PetscMalloc1(extmem*nee,&extrow);
1165: PetscMalloc1(nee,&extrows);
1166: PetscCalloc1(nee,&extrowcum);
1167: for (i=0;i<nv;i++) {
1168: PetscInt mark = 0,size,start;
1170: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1171: for (j=ii[i];j<ii[i+1];j++)
1172: if (marks[jj[j]] && !mark)
1173: mark = marks[jj[j]];
1175: /* not relevant */
1176: if (!mark) continue;
1178: /* import extended row */
1179: mark--;
1180: start = mark*extmem+extrowcum[mark];
1181: size = ii[i+1]-ii[i];
1182: if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1183: PetscArraycpy(extrow+start,jj+ii[i],size);
1184: extrowcum[mark] += size;
1185: }
1186: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1187: MatDestroy(&lGt);
1188: PetscFree(marks);
1190: /* Compress extrows */
1191: cum = 0;
1192: for (i=0;i<nee;i++) {
1193: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1194: PetscSortRemoveDupsInt(&size,start);
1195: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1196: cum = PetscMax(cum,size);
1197: }
1198: PetscFree(extrowcum);
1199: PetscBTDestroy(&btv);
1200: PetscBTDestroy(&btvcand);
1202: /* Workspace for lapack inner calls and VecSetValues */
1203: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1205: /* Create change of basis matrix (preallocation can be improved) */
1206: MatCreate(comm,&T);
1207: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1208: pc->pmat->rmap->N,pc->pmat->rmap->N);
1209: MatSetType(T,MATAIJ);
1210: MatSeqAIJSetPreallocation(T,10,NULL);
1211: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1212: MatSetLocalToGlobalMapping(T,al2g,al2g);
1213: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1214: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1215: ISLocalToGlobalMappingDestroy(&al2g);
1217: /* Defaults to identity */
1218: MatCreateVecs(pc->pmat,&tvec,NULL);
1219: VecSet(tvec,1.0);
1220: MatDiagonalSet(T,tvec,INSERT_VALUES);
1221: VecDestroy(&tvec);
1223: /* Create discrete gradient for the coarser level if needed */
1224: MatDestroy(&pcbddc->nedcG);
1225: ISDestroy(&pcbddc->nedclocal);
1226: if (pcbddc->current_level < pcbddc->max_levels) {
1227: ISLocalToGlobalMapping cel2g,cvl2g;
1228: IS wis,gwis;
1229: PetscInt cnv,cne;
1231: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1232: if (fl2g) {
1233: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1234: } else {
1235: PetscObjectReference((PetscObject)wis);
1236: pcbddc->nedclocal = wis;
1237: }
1238: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1239: ISDestroy(&wis);
1240: ISRenumber(gwis,NULL,&cne,&wis);
1241: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1242: ISDestroy(&wis);
1243: ISDestroy(&gwis);
1245: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1246: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1247: ISDestroy(&wis);
1248: ISRenumber(gwis,NULL,&cnv,&wis);
1249: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1250: ISDestroy(&wis);
1251: ISDestroy(&gwis);
1253: MatCreate(comm,&pcbddc->nedcG);
1254: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1255: MatSetType(pcbddc->nedcG,MATAIJ);
1256: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1257: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1258: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1259: ISLocalToGlobalMappingDestroy(&cel2g);
1260: ISLocalToGlobalMappingDestroy(&cvl2g);
1261: }
1262: ISLocalToGlobalMappingDestroy(&vl2g);
1264: #if defined(PRINT_GDET)
1265: inc = 0;
1266: lev = pcbddc->current_level;
1267: #endif
1269: /* Insert values in the change of basis matrix */
1270: for (i=0;i<nee;i++) {
1271: Mat Gins = NULL, GKins = NULL;
1272: IS cornersis = NULL;
1273: PetscScalar cvals[2];
1275: if (pcbddc->nedcG) {
1276: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1277: }
1278: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1279: if (Gins && GKins) {
1280: const PetscScalar *data;
1281: const PetscInt *rows,*cols;
1282: PetscInt nrh,nch,nrc,ncc;
1284: ISGetIndices(eedges[i],&cols);
1285: /* H1 */
1286: ISGetIndices(extrows[i],&rows);
1287: MatGetSize(Gins,&nrh,&nch);
1288: MatDenseGetArrayRead(Gins,&data);
1289: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1290: MatDenseRestoreArrayRead(Gins,&data);
1291: ISRestoreIndices(extrows[i],&rows);
1292: /* complement */
1293: MatGetSize(GKins,&nrc,&ncc);
1294: if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1295: if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1296: if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1297: MatDenseGetArrayRead(GKins,&data);
1298: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1299: MatDenseRestoreArrayRead(GKins,&data);
1301: /* coarse discrete gradient */
1302: if (pcbddc->nedcG) {
1303: PetscInt cols[2];
1305: cols[0] = 2*i;
1306: cols[1] = 2*i+1;
1307: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1308: }
1309: ISRestoreIndices(eedges[i],&cols);
1310: }
1311: ISDestroy(&extrows[i]);
1312: ISDestroy(&extcols[i]);
1313: ISDestroy(&cornersis);
1314: MatDestroy(&Gins);
1315: MatDestroy(&GKins);
1316: }
1317: ISLocalToGlobalMappingDestroy(&el2g);
1319: /* Start assembling */
1320: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1321: if (pcbddc->nedcG) {
1322: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1323: }
1325: /* Free */
1326: if (fl2g) {
1327: ISDestroy(&primals);
1328: for (i=0;i<nee;i++) {
1329: ISDestroy(&eedges[i]);
1330: }
1331: PetscFree(eedges);
1332: }
1334: /* hack mat_graph with primal dofs on the coarse edges */
1335: {
1336: PCBDDCGraph graph = pcbddc->mat_graph;
1337: PetscInt *oqueue = graph->queue;
1338: PetscInt *ocptr = graph->cptr;
1339: PetscInt ncc,*idxs;
1341: /* find first primal edge */
1342: if (pcbddc->nedclocal) {
1343: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1344: } else {
1345: if (fl2g) {
1346: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1347: }
1348: idxs = cedges;
1349: }
1350: cum = 0;
1351: while (cum < nee && cedges[cum] < 0) cum++;
1353: /* adapt connected components */
1354: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1355: graph->cptr[0] = 0;
1356: for (i=0,ncc=0;i<graph->ncc;i++) {
1357: PetscInt lc = ocptr[i+1]-ocptr[i];
1358: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1359: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1360: graph->queue[graph->cptr[ncc]] = cedges[cum];
1361: ncc++;
1362: lc--;
1363: cum++;
1364: while (cum < nee && cedges[cum] < 0) cum++;
1365: }
1366: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1367: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1368: ncc++;
1369: }
1370: graph->ncc = ncc;
1371: if (pcbddc->nedclocal) {
1372: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1373: }
1374: PetscFree2(ocptr,oqueue);
1375: }
1376: ISLocalToGlobalMappingDestroy(&fl2g);
1377: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1378: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1379: MatDestroy(&conn);
1381: ISDestroy(&nedfieldlocal);
1382: PetscFree(extrow);
1383: PetscFree2(work,rwork);
1384: PetscFree(corners);
1385: PetscFree(cedges);
1386: PetscFree(extrows);
1387: PetscFree(extcols);
1388: MatDestroy(&lG);
1390: /* Complete assembling */
1391: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1392: if (pcbddc->nedcG) {
1393: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1394: #if 0
1395: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1396: MatView(pcbddc->nedcG,NULL);
1397: #endif
1398: }
1400: /* set change of basis */
1401: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1402: MatDestroy(&T);
1404: return(0);
1405: }
1407: /* the near-null space of BDDC carries information on quadrature weights,
1408: and these can be collinear -> so cheat with MatNullSpaceCreate
1409: and create a suitable set of basis vectors first */
1410: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1411: {
1413: PetscInt i;
1416: for (i=0;i<nvecs;i++) {
1417: PetscInt first,last;
1419: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1420: if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1421: if (i>=first && i < last) {
1422: PetscScalar *data;
1423: VecGetArray(quad_vecs[i],&data);
1424: if (!has_const) {
1425: data[i-first] = 1.;
1426: } else {
1427: data[2*i-first] = 1./PetscSqrtReal(2.);
1428: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1429: }
1430: VecRestoreArray(quad_vecs[i],&data);
1431: }
1432: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1433: }
1434: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1435: for (i=0;i<nvecs;i++) { /* reset vectors */
1436: PetscInt first,last;
1437: VecLockReadPop(quad_vecs[i]);
1438: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1439: if (i>=first && i < last) {
1440: PetscScalar *data;
1441: VecGetArray(quad_vecs[i],&data);
1442: if (!has_const) {
1443: data[i-first] = 0.;
1444: } else {
1445: data[2*i-first] = 0.;
1446: data[2*i-first+1] = 0.;
1447: }
1448: VecRestoreArray(quad_vecs[i],&data);
1449: }
1450: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1451: VecLockReadPush(quad_vecs[i]);
1452: }
1453: return(0);
1454: }
1456: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1457: {
1458: Mat loc_divudotp;
1459: Vec p,v,vins,quad_vec,*quad_vecs;
1460: ISLocalToGlobalMapping map;
1461: PetscScalar *vals;
1462: const PetscScalar *array;
1463: PetscInt i,maxneighs,maxsize,*gidxs;
1464: PetscInt n_neigh,*neigh,*n_shared,**shared;
1465: PetscMPIInt rank;
1466: PetscErrorCode ierr;
1469: ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1470: MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1471: if (!maxneighs) {
1472: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1473: *nnsp = NULL;
1474: return(0);
1475: }
1476: maxsize = 0;
1477: for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1478: PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1479: /* create vectors to hold quadrature weights */
1480: MatCreateVecs(A,&quad_vec,NULL);
1481: if (!transpose) {
1482: MatGetLocalToGlobalMapping(A,&map,NULL);
1483: } else {
1484: MatGetLocalToGlobalMapping(A,NULL,&map);
1485: }
1486: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1487: VecDestroy(&quad_vec);
1488: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1489: for (i=0;i<maxneighs;i++) {
1490: VecLockReadPop(quad_vecs[i]);
1491: }
1493: /* compute local quad vec */
1494: MatISGetLocalMat(divudotp,&loc_divudotp);
1495: if (!transpose) {
1496: MatCreateVecs(loc_divudotp,&v,&p);
1497: } else {
1498: MatCreateVecs(loc_divudotp,&p,&v);
1499: }
1500: VecSet(p,1.);
1501: if (!transpose) {
1502: MatMultTranspose(loc_divudotp,p,v);
1503: } else {
1504: MatMult(loc_divudotp,p,v);
1505: }
1506: if (vl2l) {
1507: Mat lA;
1508: VecScatter sc;
1510: MatISGetLocalMat(A,&lA);
1511: MatCreateVecs(lA,&vins,NULL);
1512: VecScatterCreate(v,NULL,vins,vl2l,&sc);
1513: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1514: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1515: VecScatterDestroy(&sc);
1516: } else {
1517: vins = v;
1518: }
1519: VecGetArrayRead(vins,&array);
1520: VecDestroy(&p);
1522: /* insert in global quadrature vecs */
1523: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1524: for (i=0;i<n_neigh;i++) {
1525: const PetscInt *idxs;
1526: PetscInt idx,nn,j;
1528: idxs = shared[i];
1529: nn = n_shared[i];
1530: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1531: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1532: idx = -(idx+1);
1533: ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1534: VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1535: }
1536: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1537: VecRestoreArrayRead(vins,&array);
1538: if (vl2l) {
1539: VecDestroy(&vins);
1540: }
1541: VecDestroy(&v);
1542: PetscFree2(gidxs,vals);
1544: /* assemble near null space */
1545: for (i=0;i<maxneighs;i++) {
1546: VecAssemblyBegin(quad_vecs[i]);
1547: }
1548: for (i=0;i<maxneighs;i++) {
1549: VecAssemblyEnd(quad_vecs[i]);
1550: VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1551: VecLockReadPush(quad_vecs[i]);
1552: }
1553: VecDestroyVecs(maxneighs,&quad_vecs);
1554: return(0);
1555: }
1557: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1558: {
1559: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1563: if (primalv) {
1564: if (pcbddc->user_primal_vertices_local) {
1565: IS list[2], newp;
1567: list[0] = primalv;
1568: list[1] = pcbddc->user_primal_vertices_local;
1569: ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1570: ISSortRemoveDups(newp);
1571: ISDestroy(&list[1]);
1572: pcbddc->user_primal_vertices_local = newp;
1573: } else {
1574: PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1575: }
1576: }
1577: return(0);
1578: }
1580: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1581: {
1582: PetscInt f, *comp = (PetscInt *)ctx;
1585: for (f=0;f<Nf;f++) out[f] = X[*comp];
1586: return(0);
1587: }
1589: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1590: {
1592: Vec local,global;
1593: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1594: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1595: PetscBool monolithic = PETSC_FALSE;
1598: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1599: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1600: PetscOptionsEnd();
1601: /* need to convert from global to local topology information and remove references to information in global ordering */
1602: MatCreateVecs(pc->pmat,&global,NULL);
1603: MatCreateVecs(matis->A,&local,NULL);
1604: VecPinToCPU(global,PETSC_TRUE);
1605: VecPinToCPU(local,PETSC_TRUE);
1606: if (monolithic) { /* just get block size to properly compute vertices */
1607: if (pcbddc->vertex_size == 1) {
1608: MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1609: }
1610: goto boundary;
1611: }
1613: if (pcbddc->user_provided_isfordofs) {
1614: if (pcbddc->n_ISForDofs) {
1615: PetscInt i;
1617: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1618: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1619: PetscInt bs;
1621: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1622: ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1623: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1624: ISDestroy(&pcbddc->ISForDofs[i]);
1625: }
1626: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1627: pcbddc->n_ISForDofs = 0;
1628: PetscFree(pcbddc->ISForDofs);
1629: }
1630: } else {
1631: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1632: DM dm;
1634: MatGetDM(pc->pmat, &dm);
1635: if (!dm) {
1636: PCGetDM(pc, &dm);
1637: }
1638: if (dm) {
1639: IS *fields;
1640: PetscInt nf,i;
1642: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1643: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1644: for (i=0;i<nf;i++) {
1645: PetscInt bs;
1647: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1648: ISGetBlockSize(fields[i],&bs);
1649: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1650: ISDestroy(&fields[i]);
1651: }
1652: PetscFree(fields);
1653: pcbddc->n_ISForDofsLocal = nf;
1654: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1655: PetscContainer c;
1657: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1658: if (c) {
1659: MatISLocalFields lf;
1660: PetscContainerGetPointer(c,(void**)&lf);
1661: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1662: } else { /* fallback, create the default fields if bs > 1 */
1663: PetscInt i, n = matis->A->rmap->n;
1664: MatGetBlockSize(pc->pmat,&i);
1665: if (i > 1) {
1666: pcbddc->n_ISForDofsLocal = i;
1667: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1668: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1669: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1670: }
1671: }
1672: }
1673: }
1674: } else {
1675: PetscInt i;
1676: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1677: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1678: }
1679: }
1680: }
1682: boundary:
1683: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1684: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1685: } else if (pcbddc->DirichletBoundariesLocal) {
1686: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1687: }
1688: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1689: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1690: } else if (pcbddc->NeumannBoundariesLocal) {
1691: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1692: }
1693: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1694: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1695: }
1696: VecDestroy(&global);
1697: VecDestroy(&local);
1698: /* detect local disconnected subdomains if requested (use matis->A) */
1699: if (pcbddc->detect_disconnected) {
1700: IS primalv = NULL;
1701: PetscInt i;
1702: PetscBool filter = pcbddc->detect_disconnected_filter;
1704: for (i=0;i<pcbddc->n_local_subs;i++) {
1705: ISDestroy(&pcbddc->local_subs[i]);
1706: }
1707: PetscFree(pcbddc->local_subs);
1708: PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1709: PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1710: ISDestroy(&primalv);
1711: }
1712: /* early stage corner detection */
1713: {
1714: DM dm;
1716: MatGetDM(pc->pmat,&dm);
1717: if (!dm) {
1718: PCGetDM(pc,&dm);
1719: }
1720: if (dm) {
1721: PetscBool isda;
1723: PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1724: if (isda) {
1725: ISLocalToGlobalMapping l2l;
1726: IS corners;
1727: Mat lA;
1728: PetscBool gl,lo;
1730: {
1731: Vec cvec;
1732: const PetscScalar *coords;
1733: PetscInt dof,n,cdim;
1734: PetscBool memc = PETSC_TRUE;
1736: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1737: DMGetCoordinates(dm,&cvec);
1738: VecGetLocalSize(cvec,&n);
1739: VecGetBlockSize(cvec,&cdim);
1740: n /= cdim;
1741: PetscFree(pcbddc->mat_graph->coords);
1742: PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1743: VecGetArrayRead(cvec,&coords);
1744: #if defined(PETSC_USE_COMPLEX)
1745: memc = PETSC_FALSE;
1746: #endif
1747: if (dof != 1) memc = PETSC_FALSE;
1748: if (memc) {
1749: PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);
1750: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1751: PetscReal *bcoords = pcbddc->mat_graph->coords;
1752: PetscInt i, b, d;
1754: for (i=0;i<n;i++) {
1755: for (b=0;b<dof;b++) {
1756: for (d=0;d<cdim;d++) {
1757: bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1758: }
1759: }
1760: }
1761: }
1762: VecRestoreArrayRead(cvec,&coords);
1763: pcbddc->mat_graph->cdim = cdim;
1764: pcbddc->mat_graph->cnloc = dof*n;
1765: pcbddc->mat_graph->cloc = PETSC_FALSE;
1766: }
1767: DMDAGetSubdomainCornersIS(dm,&corners);
1768: MatISGetLocalMat(pc->pmat,&lA);
1769: MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1770: MatISRestoreLocalMat(pc->pmat,&lA);
1771: lo = (PetscBool)(l2l && corners);
1772: MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1773: if (gl) { /* From PETSc's DMDA */
1774: const PetscInt *idx;
1775: PetscInt dof,bs,*idxout,n;
1777: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1778: ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1779: ISGetLocalSize(corners,&n);
1780: ISGetIndices(corners,&idx);
1781: if (bs == dof) {
1782: PetscMalloc1(n,&idxout);
1783: ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1784: } else { /* the original DMDA local-to-local map have been modified */
1785: PetscInt i,d;
1787: PetscMalloc1(dof*n,&idxout);
1788: for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1789: ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);
1791: bs = 1;
1792: n *= dof;
1793: }
1794: ISRestoreIndices(corners,&idx);
1795: DMDARestoreSubdomainCornersIS(dm,&corners);
1796: ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1797: PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1798: ISDestroy(&corners);
1799: pcbddc->corner_selected = PETSC_TRUE;
1800: pcbddc->corner_selection = PETSC_TRUE;
1801: }
1802: if (corners) {
1803: DMDARestoreSubdomainCornersIS(dm,&corners);
1804: }
1805: }
1806: }
1807: }
1808: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1809: DM dm;
1811: MatGetDM(pc->pmat,&dm);
1812: if (!dm) {
1813: PCGetDM(pc,&dm);
1814: }
1815: if (dm) { /* this can get very expensive, I need to find a faster alternative */
1816: Vec vcoords;
1817: PetscSection section;
1818: PetscReal *coords;
1819: PetscInt d,cdim,nl,nf,**ctxs;
1820: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1822: DMGetCoordinateDim(dm,&cdim);
1823: DMGetLocalSection(dm,§ion);
1824: PetscSectionGetNumFields(section,&nf);
1825: DMCreateGlobalVector(dm,&vcoords);
1826: VecGetLocalSize(vcoords,&nl);
1827: PetscMalloc1(nl*cdim,&coords);
1828: PetscMalloc2(nf,&funcs,nf,&ctxs);
1829: PetscMalloc1(nf,&ctxs[0]);
1830: for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1831: for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1832: for (d=0;d<cdim;d++) {
1833: PetscInt i;
1834: const PetscScalar *v;
1836: for (i=0;i<nf;i++) ctxs[i][0] = d;
1837: DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1838: VecGetArrayRead(vcoords,&v);
1839: for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1840: VecRestoreArrayRead(vcoords,&v);
1841: }
1842: VecDestroy(&vcoords);
1843: PCSetCoordinates(pc,cdim,nl,coords);
1844: PetscFree(coords);
1845: PetscFree(ctxs[0]);
1846: PetscFree2(funcs,ctxs);
1847: }
1848: }
1849: return(0);
1850: }
1852: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1853: {
1854: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1855: PetscErrorCode ierr;
1856: IS nis;
1857: const PetscInt *idxs;
1858: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1859: PetscBool *ld;
1862: if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1863: if (mop == MPI_LAND) {
1864: /* init rootdata with true */
1865: ld = (PetscBool*) matis->sf_rootdata;
1866: for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1867: } else {
1868: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
1869: }
1870: PetscArrayzero(matis->sf_leafdata,n);
1871: ISGetLocalSize(*is,&nd);
1872: ISGetIndices(*is,&idxs);
1873: ld = (PetscBool*) matis->sf_leafdata;
1874: for (i=0;i<nd;i++)
1875: if (-1 < idxs[i] && idxs[i] < n)
1876: ld[idxs[i]] = PETSC_TRUE;
1877: ISRestoreIndices(*is,&idxs);
1878: PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1879: PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1880: PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1881: PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1882: if (mop == MPI_LAND) {
1883: PetscMalloc1(nd,&nidxs);
1884: } else {
1885: PetscMalloc1(n,&nidxs);
1886: }
1887: for (i=0,nnd=0;i<n;i++)
1888: if (ld[i])
1889: nidxs[nnd++] = i;
1890: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1891: ISDestroy(is);
1892: *is = nis;
1893: return(0);
1894: }
1896: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1897: {
1898: PC_IS *pcis = (PC_IS*)(pc->data);
1899: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1900: PetscErrorCode ierr;
1903: if (!pcbddc->benign_have_null) {
1904: return(0);
1905: }
1906: if (pcbddc->ChangeOfBasisMatrix) {
1907: Vec swap;
1909: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1910: swap = pcbddc->work_change;
1911: pcbddc->work_change = r;
1912: r = swap;
1913: }
1914: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1915: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1916: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1917: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1918: VecSet(z,0.);
1919: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1920: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1921: if (pcbddc->ChangeOfBasisMatrix) {
1922: pcbddc->work_change = r;
1923: VecCopy(z,pcbddc->work_change);
1924: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1925: }
1926: return(0);
1927: }
1929: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1930: {
1931: PCBDDCBenignMatMult_ctx ctx;
1932: PetscErrorCode ierr;
1933: PetscBool apply_right,apply_left,reset_x;
1936: MatShellGetContext(A,&ctx);
1937: if (transpose) {
1938: apply_right = ctx->apply_left;
1939: apply_left = ctx->apply_right;
1940: } else {
1941: apply_right = ctx->apply_right;
1942: apply_left = ctx->apply_left;
1943: }
1944: reset_x = PETSC_FALSE;
1945: if (apply_right) {
1946: const PetscScalar *ax;
1947: PetscInt nl,i;
1949: VecGetLocalSize(x,&nl);
1950: VecGetArrayRead(x,&ax);
1951: PetscArraycpy(ctx->work,ax,nl);
1952: VecRestoreArrayRead(x,&ax);
1953: for (i=0;i<ctx->benign_n;i++) {
1954: PetscScalar sum,val;
1955: const PetscInt *idxs;
1956: PetscInt nz,j;
1957: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1958: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1959: sum = 0.;
1960: if (ctx->apply_p0) {
1961: val = ctx->work[idxs[nz-1]];
1962: for (j=0;j<nz-1;j++) {
1963: sum += ctx->work[idxs[j]];
1964: ctx->work[idxs[j]] += val;
1965: }
1966: } else {
1967: for (j=0;j<nz-1;j++) {
1968: sum += ctx->work[idxs[j]];
1969: }
1970: }
1971: ctx->work[idxs[nz-1]] -= sum;
1972: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1973: }
1974: VecPlaceArray(x,ctx->work);
1975: reset_x = PETSC_TRUE;
1976: }
1977: if (transpose) {
1978: MatMultTranspose(ctx->A,x,y);
1979: } else {
1980: MatMult(ctx->A,x,y);
1981: }
1982: if (reset_x) {
1983: VecResetArray(x);
1984: }
1985: if (apply_left) {
1986: PetscScalar *ay;
1987: PetscInt i;
1989: VecGetArray(y,&ay);
1990: for (i=0;i<ctx->benign_n;i++) {
1991: PetscScalar sum,val;
1992: const PetscInt *idxs;
1993: PetscInt nz,j;
1994: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1995: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1996: val = -ay[idxs[nz-1]];
1997: if (ctx->apply_p0) {
1998: sum = 0.;
1999: for (j=0;j<nz-1;j++) {
2000: sum += ay[idxs[j]];
2001: ay[idxs[j]] += val;
2002: }
2003: ay[idxs[nz-1]] += sum;
2004: } else {
2005: for (j=0;j<nz-1;j++) {
2006: ay[idxs[j]] += val;
2007: }
2008: ay[idxs[nz-1]] = 0.;
2009: }
2010: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2011: }
2012: VecRestoreArray(y,&ay);
2013: }
2014: return(0);
2015: }
2017: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2018: {
2022: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2023: return(0);
2024: }
2026: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2027: {
2031: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2032: return(0);
2033: }
2035: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2036: {
2037: PC_IS *pcis = (PC_IS*)pc->data;
2038: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2039: PCBDDCBenignMatMult_ctx ctx;
2040: PetscErrorCode ierr;
2043: if (!restore) {
2044: Mat A_IB,A_BI;
2045: PetscScalar *work;
2046: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2048: if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2049: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2050: PetscMalloc1(pcis->n,&work);
2051: MatCreate(PETSC_COMM_SELF,&A_IB);
2052: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2053: MatSetType(A_IB,MATSHELL);
2054: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2055: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2056: PetscNew(&ctx);
2057: MatShellSetContext(A_IB,ctx);
2058: ctx->apply_left = PETSC_TRUE;
2059: ctx->apply_right = PETSC_FALSE;
2060: ctx->apply_p0 = PETSC_FALSE;
2061: ctx->benign_n = pcbddc->benign_n;
2062: if (reuse) {
2063: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2064: ctx->free = PETSC_FALSE;
2065: } else { /* TODO: could be optimized for successive solves */
2066: ISLocalToGlobalMapping N_to_D;
2067: PetscInt i;
2069: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2070: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2071: for (i=0;i<pcbddc->benign_n;i++) {
2072: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2073: }
2074: ISLocalToGlobalMappingDestroy(&N_to_D);
2075: ctx->free = PETSC_TRUE;
2076: }
2077: ctx->A = pcis->A_IB;
2078: ctx->work = work;
2079: MatSetUp(A_IB);
2080: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2081: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2082: pcis->A_IB = A_IB;
2084: /* A_BI as A_IB^T */
2085: MatCreateTranspose(A_IB,&A_BI);
2086: pcbddc->benign_original_mat = pcis->A_BI;
2087: pcis->A_BI = A_BI;
2088: } else {
2089: if (!pcbddc->benign_original_mat) {
2090: return(0);
2091: }
2092: MatShellGetContext(pcis->A_IB,&ctx);
2093: MatDestroy(&pcis->A_IB);
2094: pcis->A_IB = ctx->A;
2095: ctx->A = NULL;
2096: MatDestroy(&pcis->A_BI);
2097: pcis->A_BI = pcbddc->benign_original_mat;
2098: pcbddc->benign_original_mat = NULL;
2099: if (ctx->free) {
2100: PetscInt i;
2101: for (i=0;i<ctx->benign_n;i++) {
2102: ISDestroy(&ctx->benign_zerodiag_subs[i]);
2103: }
2104: PetscFree(ctx->benign_zerodiag_subs);
2105: }
2106: PetscFree(ctx->work);
2107: PetscFree(ctx);
2108: }
2109: return(0);
2110: }
2112: /* used just in bddc debug mode */
2113: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2114: {
2115: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2116: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2117: Mat An;
2121: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2122: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2123: if (is1) {
2124: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2125: MatDestroy(&An);
2126: } else {
2127: *B = An;
2128: }
2129: return(0);
2130: }
2132: /* TODO: add reuse flag */
2133: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2134: {
2135: Mat Bt;
2136: PetscScalar *a,*bdata;
2137: const PetscInt *ii,*ij;
2138: PetscInt m,n,i,nnz,*bii,*bij;
2139: PetscBool flg_row;
2143: MatGetSize(A,&n,&m);
2144: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2145: MatSeqAIJGetArray(A,&a);
2146: nnz = n;
2147: for (i=0;i<ii[n];i++) {
2148: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2149: }
2150: PetscMalloc1(n+1,&bii);
2151: PetscMalloc1(nnz,&bij);
2152: PetscMalloc1(nnz,&bdata);
2153: nnz = 0;
2154: bii[0] = 0;
2155: for (i=0;i<n;i++) {
2156: PetscInt j;
2157: for (j=ii[i];j<ii[i+1];j++) {
2158: PetscScalar entry = a[j];
2159: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2160: bij[nnz] = ij[j];
2161: bdata[nnz] = entry;
2162: nnz++;
2163: }
2164: }
2165: bii[i+1] = nnz;
2166: }
2167: MatSeqAIJRestoreArray(A,&a);
2168: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2169: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2170: {
2171: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2172: b->free_a = PETSC_TRUE;
2173: b->free_ij = PETSC_TRUE;
2174: }
2175: if (*B == A) {
2176: MatDestroy(&A);
2177: }
2178: *B = Bt;
2179: return(0);
2180: }
2182: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2183: {
2184: Mat B = NULL;
2185: DM dm;
2186: IS is_dummy,*cc_n;
2187: ISLocalToGlobalMapping l2gmap_dummy;
2188: PCBDDCGraph graph;
2189: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2190: PetscInt i,n;
2191: PetscInt *xadj,*adjncy;
2192: PetscBool isplex = PETSC_FALSE;
2193: PetscErrorCode ierr;
2196: if (ncc) *ncc = 0;
2197: if (cc) *cc = NULL;
2198: if (primalv) *primalv = NULL;
2199: PCBDDCGraphCreate(&graph);
2200: MatGetDM(pc->pmat,&dm);
2201: if (!dm) {
2202: PCGetDM(pc,&dm);
2203: }
2204: if (dm) {
2205: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2206: }
2207: if (filter) isplex = PETSC_FALSE;
2209: if (isplex) { /* this code has been modified from plexpartition.c */
2210: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2211: PetscInt *adj = NULL;
2212: IS cellNumbering;
2213: const PetscInt *cellNum;
2214: PetscBool useCone, useClosure;
2215: PetscSection section;
2216: PetscSegBuffer adjBuffer;
2217: PetscSF sfPoint;
2221: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2222: DMGetPointSF(dm, &sfPoint);
2223: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2224: /* Build adjacency graph via a section/segbuffer */
2225: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2226: PetscSectionSetChart(section, pStart, pEnd);
2227: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2228: /* Always use FVM adjacency to create partitioner graph */
2229: DMGetBasicAdjacency(dm, &useCone, &useClosure);
2230: DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2231: DMPlexGetCellNumbering(dm, &cellNumbering);
2232: ISGetIndices(cellNumbering, &cellNum);
2233: for (n = 0, p = pStart; p < pEnd; p++) {
2234: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2235: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2236: adjSize = PETSC_DETERMINE;
2237: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2238: for (a = 0; a < adjSize; ++a) {
2239: const PetscInt point = adj[a];
2240: if (pStart <= point && point < pEnd) {
2241: PetscInt *PETSC_RESTRICT pBuf;
2242: PetscSectionAddDof(section, p, 1);
2243: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2244: *pBuf = point;
2245: }
2246: }
2247: n++;
2248: }
2249: DMSetBasicAdjacency(dm, useCone, useClosure);
2250: /* Derive CSR graph from section/segbuffer */
2251: PetscSectionSetUp(section);
2252: PetscSectionGetStorageSize(section, &size);
2253: PetscMalloc1(n+1, &xadj);
2254: for (idx = 0, p = pStart; p < pEnd; p++) {
2255: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2256: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2257: }
2258: xadj[n] = size;
2259: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2260: /* Clean up */
2261: PetscSegBufferDestroy(&adjBuffer);
2262: PetscSectionDestroy(§ion);
2263: PetscFree(adj);
2264: graph->xadj = xadj;
2265: graph->adjncy = adjncy;
2266: } else {
2267: Mat A;
2268: PetscBool isseqaij, flg_row;
2270: MatISGetLocalMat(pc->pmat,&A);
2271: if (!A->rmap->N || !A->cmap->N) {
2272: PCBDDCGraphDestroy(&graph);
2273: return(0);
2274: }
2275: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2276: if (!isseqaij && filter) {
2277: PetscBool isseqdense;
2279: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2280: if (!isseqdense) {
2281: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2282: } else { /* TODO: rectangular case and LDA */
2283: PetscScalar *array;
2284: PetscReal chop=1.e-6;
2286: MatDuplicate(A,MAT_COPY_VALUES,&B);
2287: MatDenseGetArray(B,&array);
2288: MatGetSize(B,&n,NULL);
2289: for (i=0;i<n;i++) {
2290: PetscInt j;
2291: for (j=i+1;j<n;j++) {
2292: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2293: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2294: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2295: }
2296: }
2297: MatDenseRestoreArray(B,&array);
2298: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2299: }
2300: } else {
2301: PetscObjectReference((PetscObject)A);
2302: B = A;
2303: }
2304: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2306: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2307: if (filter) {
2308: PetscScalar *data;
2309: PetscInt j,cum;
2311: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2312: MatSeqAIJGetArray(B,&data);
2313: cum = 0;
2314: for (i=0;i<n;i++) {
2315: PetscInt t;
2317: for (j=xadj[i];j<xadj[i+1];j++) {
2318: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2319: continue;
2320: }
2321: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2322: }
2323: t = xadj_filtered[i];
2324: xadj_filtered[i] = cum;
2325: cum += t;
2326: }
2327: MatSeqAIJRestoreArray(B,&data);
2328: graph->xadj = xadj_filtered;
2329: graph->adjncy = adjncy_filtered;
2330: } else {
2331: graph->xadj = xadj;
2332: graph->adjncy = adjncy;
2333: }
2334: }
2335: /* compute local connected components using PCBDDCGraph */
2336: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2337: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2338: ISDestroy(&is_dummy);
2339: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2340: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2341: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2342: PCBDDCGraphComputeConnectedComponents(graph);
2344: /* partial clean up */
2345: PetscFree2(xadj_filtered,adjncy_filtered);
2346: if (B) {
2347: PetscBool flg_row;
2348: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2349: MatDestroy(&B);
2350: }
2351: if (isplex) {
2352: PetscFree(xadj);
2353: PetscFree(adjncy);
2354: }
2356: /* get back data */
2357: if (isplex) {
2358: if (ncc) *ncc = graph->ncc;
2359: if (cc || primalv) {
2360: Mat A;
2361: PetscBT btv,btvt;
2362: PetscSection subSection;
2363: PetscInt *ids,cum,cump,*cids,*pids;
2365: DMPlexGetSubdomainSection(dm,&subSection);
2366: MatISGetLocalMat(pc->pmat,&A);
2367: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2368: PetscBTCreate(A->rmap->n,&btv);
2369: PetscBTCreate(A->rmap->n,&btvt);
2371: cids[0] = 0;
2372: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2373: PetscInt j;
2375: PetscBTMemzero(A->rmap->n,btvt);
2376: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2377: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2379: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2380: for (k = 0; k < 2*size; k += 2) {
2381: PetscInt s, pp, p = closure[k], off, dof, cdof;
2383: PetscSectionGetConstraintDof(subSection,p,&cdof);
2384: PetscSectionGetOffset(subSection,p,&off);
2385: PetscSectionGetDof(subSection,p,&dof);
2386: for (s = 0; s < dof-cdof; s++) {
2387: if (PetscBTLookupSet(btvt,off+s)) continue;
2388: if (!PetscBTLookup(btv,off+s)) {
2389: ids[cum++] = off+s;
2390: } else { /* cross-vertex */
2391: pids[cump++] = off+s;
2392: }
2393: }
2394: DMPlexGetTreeParent(dm,p,&pp,NULL);
2395: if (pp != p) {
2396: PetscSectionGetConstraintDof(subSection,pp,&cdof);
2397: PetscSectionGetOffset(subSection,pp,&off);
2398: PetscSectionGetDof(subSection,pp,&dof);
2399: for (s = 0; s < dof-cdof; s++) {
2400: if (PetscBTLookupSet(btvt,off+s)) continue;
2401: if (!PetscBTLookup(btv,off+s)) {
2402: ids[cum++] = off+s;
2403: } else { /* cross-vertex */
2404: pids[cump++] = off+s;
2405: }
2406: }
2407: }
2408: }
2409: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2410: }
2411: cids[i+1] = cum;
2412: /* mark dofs as already assigned */
2413: for (j = cids[i]; j < cids[i+1]; j++) {
2414: PetscBTSet(btv,ids[j]);
2415: }
2416: }
2417: if (cc) {
2418: PetscMalloc1(graph->ncc,&cc_n);
2419: for (i = 0; i < graph->ncc; i++) {
2420: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2421: }
2422: *cc = cc_n;
2423: }
2424: if (primalv) {
2425: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2426: }
2427: PetscFree3(ids,cids,pids);
2428: PetscBTDestroy(&btv);
2429: PetscBTDestroy(&btvt);
2430: }
2431: } else {
2432: if (ncc) *ncc = graph->ncc;
2433: if (cc) {
2434: PetscMalloc1(graph->ncc,&cc_n);
2435: for (i=0;i<graph->ncc;i++) {
2436: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2437: }
2438: *cc = cc_n;
2439: }
2440: }
2441: /* clean up graph */
2442: graph->xadj = 0;
2443: graph->adjncy = 0;
2444: PCBDDCGraphDestroy(&graph);
2445: return(0);
2446: }
2448: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2449: {
2450: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2451: PC_IS* pcis = (PC_IS*)(pc->data);
2452: IS dirIS = NULL;
2453: PetscInt i;
2457: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2458: if (zerodiag) {
2459: Mat A;
2460: Vec vec3_N;
2461: PetscScalar *vals;
2462: const PetscInt *idxs;
2463: PetscInt nz,*count;
2465: /* p0 */
2466: VecSet(pcis->vec1_N,0.);
2467: PetscMalloc1(pcis->n,&vals);
2468: ISGetLocalSize(zerodiag,&nz);
2469: ISGetIndices(zerodiag,&idxs);
2470: for (i=0;i<nz;i++) vals[i] = 1.;
2471: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2472: VecAssemblyBegin(pcis->vec1_N);
2473: VecAssemblyEnd(pcis->vec1_N);
2474: /* v_I */
2475: VecSetRandom(pcis->vec2_N,NULL);
2476: for (i=0;i<nz;i++) vals[i] = 0.;
2477: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2478: ISRestoreIndices(zerodiag,&idxs);
2479: ISGetIndices(pcis->is_B_local,&idxs);
2480: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2481: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2482: ISRestoreIndices(pcis->is_B_local,&idxs);
2483: if (dirIS) {
2484: PetscInt n;
2486: ISGetLocalSize(dirIS,&n);
2487: ISGetIndices(dirIS,&idxs);
2488: for (i=0;i<n;i++) vals[i] = 0.;
2489: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2490: ISRestoreIndices(dirIS,&idxs);
2491: }
2492: VecAssemblyBegin(pcis->vec2_N);
2493: VecAssemblyEnd(pcis->vec2_N);
2494: VecDuplicate(pcis->vec1_N,&vec3_N);
2495: VecSet(vec3_N,0.);
2496: MatISGetLocalMat(pc->pmat,&A);
2497: MatMult(A,pcis->vec1_N,vec3_N);
2498: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2499: if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2500: PetscFree(vals);
2501: VecDestroy(&vec3_N);
2503: /* there should not be any pressure dofs lying on the interface */
2504: PetscCalloc1(pcis->n,&count);
2505: ISGetIndices(pcis->is_B_local,&idxs);
2506: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2507: ISRestoreIndices(pcis->is_B_local,&idxs);
2508: ISGetIndices(zerodiag,&idxs);
2509: for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2510: ISRestoreIndices(zerodiag,&idxs);
2511: PetscFree(count);
2512: }
2513: ISDestroy(&dirIS);
2515: /* check PCBDDCBenignGetOrSetP0 */
2516: VecSetRandom(pcis->vec1_global,NULL);
2517: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2518: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2519: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2520: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2521: for (i=0;i<pcbddc->benign_n;i++) {
2522: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2523: if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2524: }
2525: return(0);
2526: }
2528: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2529: {
2530: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2531: IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2532: PetscInt nz,n,benign_n,bsp = 1;
2533: PetscInt *interior_dofs,n_interior_dofs,nneu;
2534: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2538: if (reuse) goto project_b0;
2539: PetscSFDestroy(&pcbddc->benign_sf);
2540: MatDestroy(&pcbddc->benign_B0);
2541: for (n=0;n<pcbddc->benign_n;n++) {
2542: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2543: }
2544: PetscFree(pcbddc->benign_zerodiag_subs);
2545: has_null_pressures = PETSC_TRUE;
2546: have_null = PETSC_TRUE;
2547: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2548: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2549: Checks if all the pressure dofs in each subdomain have a zero diagonal
2550: If not, a change of basis on pressures is not needed
2551: since the local Schur complements are already SPD
2552: */
2553: if (pcbddc->n_ISForDofsLocal) {
2554: IS iP = NULL;
2555: PetscInt p,*pp;
2556: PetscBool flg;
2558: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2559: n = pcbddc->n_ISForDofsLocal;
2560: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2561: PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2562: PetscOptionsEnd();
2563: if (!flg) {
2564: n = 1;
2565: pp[0] = pcbddc->n_ISForDofsLocal-1;
2566: }
2568: bsp = 0;
2569: for (p=0;p<n;p++) {
2570: PetscInt bs;
2572: if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2573: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2574: bsp += bs;
2575: }
2576: PetscMalloc1(bsp,&bzerodiag);
2577: bsp = 0;
2578: for (p=0;p<n;p++) {
2579: const PetscInt *idxs;
2580: PetscInt b,bs,npl,*bidxs;
2582: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2583: ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2584: ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2585: PetscMalloc1(npl/bs,&bidxs);
2586: for (b=0;b<bs;b++) {
2587: PetscInt i;
2589: for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2590: ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2591: bsp++;
2592: }
2593: PetscFree(bidxs);
2594: ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2595: }
2596: ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);
2598: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2599: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2600: if (iP) {
2601: IS newpressures;
2603: ISDifference(pressures,iP,&newpressures);
2604: ISDestroy(&pressures);
2605: pressures = newpressures;
2606: }
2607: ISSorted(pressures,&sorted);
2608: if (!sorted) {
2609: ISSort(pressures);
2610: }
2611: PetscFree(pp);
2612: }
2614: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2615: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2616: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2617: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2618: ISSorted(zerodiag,&sorted);
2619: if (!sorted) {
2620: ISSort(zerodiag);
2621: }
2622: PetscObjectReference((PetscObject)zerodiag);
2623: zerodiag_save = zerodiag;
2624: ISGetLocalSize(zerodiag,&nz);
2625: if (!nz) {
2626: if (n) have_null = PETSC_FALSE;
2627: has_null_pressures = PETSC_FALSE;
2628: ISDestroy(&zerodiag);
2629: }
2630: recompute_zerodiag = PETSC_FALSE;
2632: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2633: zerodiag_subs = NULL;
2634: benign_n = 0;
2635: n_interior_dofs = 0;
2636: interior_dofs = NULL;
2637: nneu = 0;
2638: if (pcbddc->NeumannBoundariesLocal) {
2639: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2640: }
2641: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2642: if (checkb) { /* need to compute interior nodes */
2643: PetscInt n,i,j;
2644: PetscInt n_neigh,*neigh,*n_shared,**shared;
2645: PetscInt *iwork;
2647: ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2648: ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2649: PetscCalloc1(n,&iwork);
2650: PetscMalloc1(n,&interior_dofs);
2651: for (i=1;i<n_neigh;i++)
2652: for (j=0;j<n_shared[i];j++)
2653: iwork[shared[i][j]] += 1;
2654: for (i=0;i<n;i++)
2655: if (!iwork[i])
2656: interior_dofs[n_interior_dofs++] = i;
2657: PetscFree(iwork);
2658: ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2659: }
2660: if (has_null_pressures) {
2661: IS *subs;
2662: PetscInt nsubs,i,j,nl;
2663: const PetscInt *idxs;
2664: PetscScalar *array;
2665: Vec *work;
2666: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2668: subs = pcbddc->local_subs;
2669: nsubs = pcbddc->n_local_subs;
2670: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2671: if (checkb) {
2672: VecDuplicateVecs(matis->y,2,&work);
2673: ISGetLocalSize(zerodiag,&nl);
2674: ISGetIndices(zerodiag,&idxs);
2675: /* work[0] = 1_p */
2676: VecSet(work[0],0.);
2677: VecGetArray(work[0],&array);
2678: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2679: VecRestoreArray(work[0],&array);
2680: /* work[0] = 1_v */
2681: VecSet(work[1],1.);
2682: VecGetArray(work[1],&array);
2683: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2684: VecRestoreArray(work[1],&array);
2685: ISRestoreIndices(zerodiag,&idxs);
2686: }
2688: if (nsubs > 1 || bsp > 1) {
2689: IS *is;
2690: PetscInt b,totb;
2692: totb = bsp;
2693: is = bsp > 1 ? bzerodiag : &zerodiag;
2694: nsubs = PetscMax(nsubs,1);
2695: PetscCalloc1(nsubs*totb,&zerodiag_subs);
2696: for (b=0;b<totb;b++) {
2697: for (i=0;i<nsubs;i++) {
2698: ISLocalToGlobalMapping l2g;
2699: IS t_zerodiag_subs;
2700: PetscInt nl;
2702: if (subs) {
2703: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2704: } else {
2705: IS tis;
2707: MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2708: ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2709: ISLocalToGlobalMappingCreateIS(tis,&l2g);
2710: ISDestroy(&tis);
2711: }
2712: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2713: ISGetLocalSize(t_zerodiag_subs,&nl);
2714: if (nl) {
2715: PetscBool valid = PETSC_TRUE;
2717: if (checkb) {
2718: VecSet(matis->x,0);
2719: ISGetLocalSize(subs[i],&nl);
2720: ISGetIndices(subs[i],&idxs);
2721: VecGetArray(matis->x,&array);
2722: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2723: VecRestoreArray(matis->x,&array);
2724: ISRestoreIndices(subs[i],&idxs);
2725: VecPointwiseMult(matis->x,work[0],matis->x);
2726: MatMult(matis->A,matis->x,matis->y);
2727: VecPointwiseMult(matis->y,work[1],matis->y);
2728: VecGetArray(matis->y,&array);
2729: for (j=0;j<n_interior_dofs;j++) {
2730: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2731: valid = PETSC_FALSE;
2732: break;
2733: }
2734: }
2735: VecRestoreArray(matis->y,&array);
2736: }
2737: if (valid && nneu) {
2738: const PetscInt *idxs;
2739: PetscInt nzb;
2741: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2742: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2743: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2744: if (nzb) valid = PETSC_FALSE;
2745: }
2746: if (valid && pressures) {
2747: IS t_pressure_subs,tmp;
2748: PetscInt i1,i2;
2750: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2751: ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2752: ISGetLocalSize(tmp,&i1);
2753: ISGetLocalSize(t_zerodiag_subs,&i2);
2754: if (i2 != i1) valid = PETSC_FALSE;
2755: ISDestroy(&t_pressure_subs);
2756: ISDestroy(&tmp);
2757: }
2758: if (valid) {
2759: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2760: benign_n++;
2761: } else recompute_zerodiag = PETSC_TRUE;
2762: }
2763: ISDestroy(&t_zerodiag_subs);
2764: ISLocalToGlobalMappingDestroy(&l2g);
2765: }
2766: }
2767: } else { /* there's just one subdomain (or zero if they have not been detected */
2768: PetscBool valid = PETSC_TRUE;
2770: if (nneu) valid = PETSC_FALSE;
2771: if (valid && pressures) {
2772: ISEqual(pressures,zerodiag,&valid);
2773: }
2774: if (valid && checkb) {
2775: MatMult(matis->A,work[0],matis->x);
2776: VecPointwiseMult(matis->x,work[1],matis->x);
2777: VecGetArray(matis->x,&array);
2778: for (j=0;j<n_interior_dofs;j++) {
2779: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2780: valid = PETSC_FALSE;
2781: break;
2782: }
2783: }
2784: VecRestoreArray(matis->x,&array);
2785: }
2786: if (valid) {
2787: benign_n = 1;
2788: PetscMalloc1(benign_n,&zerodiag_subs);
2789: PetscObjectReference((PetscObject)zerodiag);
2790: zerodiag_subs[0] = zerodiag;
2791: }
2792: }
2793: if (checkb) {
2794: VecDestroyVecs(2,&work);
2795: }
2796: }
2797: PetscFree(interior_dofs);
2799: if (!benign_n) {
2800: PetscInt n;
2802: ISDestroy(&zerodiag);
2803: recompute_zerodiag = PETSC_FALSE;
2804: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2805: if (n) have_null = PETSC_FALSE;
2806: }
2808: /* final check for null pressures */
2809: if (zerodiag && pressures) {
2810: ISEqual(pressures,zerodiag,&have_null);
2811: }
2813: if (recompute_zerodiag) {
2814: ISDestroy(&zerodiag);
2815: if (benign_n == 1) {
2816: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2817: zerodiag = zerodiag_subs[0];
2818: } else {
2819: PetscInt i,nzn,*new_idxs;
2821: nzn = 0;
2822: for (i=0;i<benign_n;i++) {
2823: PetscInt ns;
2824: ISGetLocalSize(zerodiag_subs[i],&ns);
2825: nzn += ns;
2826: }
2827: PetscMalloc1(nzn,&new_idxs);
2828: nzn = 0;
2829: for (i=0;i<benign_n;i++) {
2830: PetscInt ns,*idxs;
2831: ISGetLocalSize(zerodiag_subs[i],&ns);
2832: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2833: PetscArraycpy(new_idxs+nzn,idxs,ns);
2834: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2835: nzn += ns;
2836: }
2837: PetscSortInt(nzn,new_idxs);
2838: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2839: }
2840: have_null = PETSC_FALSE;
2841: }
2843: /* determines if the coarse solver will be singular or not */
2844: MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2846: /* Prepare matrix to compute no-net-flux */
2847: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2848: Mat A,loc_divudotp;
2849: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2850: IS row,col,isused = NULL;
2851: PetscInt M,N,n,st,n_isused;
2853: if (pressures) {
2854: isused = pressures;
2855: } else {
2856: isused = zerodiag_save;
2857: }
2858: MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2859: MatISGetLocalMat(pc->pmat,&A);
2860: MatGetLocalSize(A,&n,NULL);
2861: if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2862: n_isused = 0;
2863: if (isused) {
2864: ISGetLocalSize(isused,&n_isused);
2865: }
2866: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2867: st = st-n_isused;
2868: if (n) {
2869: const PetscInt *gidxs;
2871: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2872: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2873: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2874: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2875: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2876: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2877: } else {
2878: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2879: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2880: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2881: }
2882: MatGetSize(pc->pmat,NULL,&N);
2883: ISGetSize(row,&M);
2884: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2885: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2886: ISDestroy(&row);
2887: ISDestroy(&col);
2888: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2889: MatSetType(pcbddc->divudotp,MATIS);
2890: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2891: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2892: ISLocalToGlobalMappingDestroy(&rl2g);
2893: ISLocalToGlobalMappingDestroy(&cl2g);
2894: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2895: MatDestroy(&loc_divudotp);
2896: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2897: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2898: }
2899: ISDestroy(&zerodiag_save);
2900: ISDestroy(&pressures);
2901: if (bzerodiag) {
2902: PetscInt i;
2904: for (i=0;i<bsp;i++) {
2905: ISDestroy(&bzerodiag[i]);
2906: }
2907: PetscFree(bzerodiag);
2908: }
2909: pcbddc->benign_n = benign_n;
2910: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2912: /* determines if the problem has subdomains with 0 pressure block */
2913: have_null = (PetscBool)(!!pcbddc->benign_n);
2914: MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2916: project_b0:
2917: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2918: /* change of basis and p0 dofs */
2919: if (pcbddc->benign_n) {
2920: PetscInt i,s,*nnz;
2922: /* local change of basis for pressures */
2923: MatDestroy(&pcbddc->benign_change);
2924: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2925: MatSetType(pcbddc->benign_change,MATAIJ);
2926: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2927: PetscMalloc1(n,&nnz);
2928: for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2929: for (i=0;i<pcbddc->benign_n;i++) {
2930: const PetscInt *idxs;
2931: PetscInt nzs,j;
2933: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2934: ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2935: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2936: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2937: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2938: }
2939: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2940: MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2941: PetscFree(nnz);
2942: /* set identity by default */
2943: for (i=0;i<n;i++) {
2944: MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2945: }
2946: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2947: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2948: /* set change on pressures */
2949: for (s=0;s<pcbddc->benign_n;s++) {
2950: PetscScalar *array;
2951: const PetscInt *idxs;
2952: PetscInt nzs;
2954: ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2955: ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2956: for (i=0;i<nzs-1;i++) {
2957: PetscScalar vals[2];
2958: PetscInt cols[2];
2960: cols[0] = idxs[i];
2961: cols[1] = idxs[nzs-1];
2962: vals[0] = 1.;
2963: vals[1] = 1.;
2964: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2965: }
2966: PetscMalloc1(nzs,&array);
2967: for (i=0;i<nzs-1;i++) array[i] = -1.;
2968: array[nzs-1] = 1.;
2969: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2970: /* store local idxs for p0 */
2971: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2972: ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2973: PetscFree(array);
2974: }
2975: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2976: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2978: /* project if needed */
2979: if (pcbddc->benign_change_explicit) {
2980: Mat M;
2982: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2983: MatDestroy(&pcbddc->local_mat);
2984: MatSeqAIJCompress(M,&pcbddc->local_mat);
2985: MatDestroy(&M);
2986: }
2987: /* store global idxs for p0 */
2988: ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2989: }
2990: *zerodiaglocal = zerodiag;
2991: return(0);
2992: }
2994: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2995: {
2996: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2997: PetscScalar *array;
3001: if (!pcbddc->benign_sf) {
3002: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
3003: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
3004: }
3005: if (get) {
3006: VecGetArrayRead(v,(const PetscScalar**)&array);
3007: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3008: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3009: VecRestoreArrayRead(v,(const PetscScalar**)&array);
3010: } else {
3011: VecGetArray(v,&array);
3012: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3013: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3014: VecRestoreArray(v,&array);
3015: }
3016: return(0);
3017: }
3019: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3020: {
3021: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3025: /* TODO: add error checking
3026: - avoid nested pop (or push) calls.
3027: - cannot push before pop.
3028: - cannot call this if pcbddc->local_mat is NULL
3029: */
3030: if (!pcbddc->benign_n) {
3031: return(0);
3032: }
3033: if (pop) {
3034: if (pcbddc->benign_change_explicit) {
3035: IS is_p0;
3036: MatReuse reuse;
3038: /* extract B_0 */
3039: reuse = MAT_INITIAL_MATRIX;
3040: if (pcbddc->benign_B0) {
3041: reuse = MAT_REUSE_MATRIX;
3042: }
3043: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3044: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3045: /* remove rows and cols from local problem */
3046: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3047: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3048: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3049: ISDestroy(&is_p0);
3050: } else {
3051: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3052: PetscScalar *vals;
3053: PetscInt i,n,*idxs_ins;
3055: VecGetLocalSize(matis->y,&n);
3056: PetscMalloc2(n,&idxs_ins,n,&vals);
3057: if (!pcbddc->benign_B0) {
3058: PetscInt *nnz;
3059: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3060: MatSetType(pcbddc->benign_B0,MATAIJ);
3061: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3062: PetscMalloc1(pcbddc->benign_n,&nnz);
3063: for (i=0;i<pcbddc->benign_n;i++) {
3064: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3065: nnz[i] = n - nnz[i];
3066: }
3067: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3068: MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3069: PetscFree(nnz);
3070: }
3072: for (i=0;i<pcbddc->benign_n;i++) {
3073: PetscScalar *array;
3074: PetscInt *idxs,j,nz,cum;
3076: VecSet(matis->x,0.);
3077: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3078: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3079: for (j=0;j<nz;j++) vals[j] = 1.;
3080: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3081: VecAssemblyBegin(matis->x);
3082: VecAssemblyEnd(matis->x);
3083: VecSet(matis->y,0.);
3084: MatMult(matis->A,matis->x,matis->y);
3085: VecGetArray(matis->y,&array);
3086: cum = 0;
3087: for (j=0;j<n;j++) {
3088: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3089: vals[cum] = array[j];
3090: idxs_ins[cum] = j;
3091: cum++;
3092: }
3093: }
3094: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3095: VecRestoreArray(matis->y,&array);
3096: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3097: }
3098: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3099: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3100: PetscFree2(idxs_ins,vals);
3101: }
3102: } else { /* push */
3103: if (pcbddc->benign_change_explicit) {
3104: PetscInt i;
3106: for (i=0;i<pcbddc->benign_n;i++) {
3107: PetscScalar *B0_vals;
3108: PetscInt *B0_cols,B0_ncol;
3110: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3111: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3112: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3113: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3114: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3115: }
3116: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3117: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3118: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3119: }
3120: return(0);
3121: }
3123: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3124: {
3125: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3126: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3127: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
3128: PetscBLASInt *B_iwork,*B_ifail;
3129: PetscScalar *work,lwork;
3130: PetscScalar *St,*S,*eigv;
3131: PetscScalar *Sarray,*Starray;
3132: PetscReal *eigs,thresh,lthresh,uthresh;
3133: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3134: PetscBool allocated_S_St;
3135: #if defined(PETSC_USE_COMPLEX)
3136: PetscReal *rwork;
3137: #endif
3138: PetscErrorCode ierr;
3141: if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3142: if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3143: if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3144: PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3146: if (pcbddc->dbg_flag) {
3147: PetscViewerFlush(pcbddc->dbg_viewer);
3148: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3149: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3150: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3151: }
3153: if (pcbddc->dbg_flag) {
3154: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3155: }
3157: /* max size of subsets */
3158: mss = 0;
3159: for (i=0;i<sub_schurs->n_subs;i++) {
3160: PetscInt subset_size;
3162: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3163: mss = PetscMax(mss,subset_size);
3164: }
3166: /* min/max and threshold */
3167: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3168: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3169: nmax = PetscMax(nmin,nmax);
3170: allocated_S_St = PETSC_FALSE;
3171: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3172: allocated_S_St = PETSC_TRUE;
3173: }
3175: /* allocate lapack workspace */
3176: cum = cum2 = 0;
3177: maxneigs = 0;
3178: for (i=0;i<sub_schurs->n_subs;i++) {
3179: PetscInt n,subset_size;
3181: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3182: n = PetscMin(subset_size,nmax);
3183: cum += subset_size;
3184: cum2 += subset_size*n;
3185: maxneigs = PetscMax(maxneigs,n);
3186: }
3187: lwork = 0;
3188: if (mss) {
3189: if (sub_schurs->is_symmetric) {
3190: PetscScalar sdummy = 0.;
3191: PetscBLASInt B_itype = 1;
3192: PetscBLASInt B_N = mss, idummy = 0;
3193: PetscReal rdummy = 0.,zero = 0.0;
3194: PetscReal eps = 0.0; /* dlamch? */
3196: B_lwork = -1;
3197: /* some implementations may complain about NULL pointers, even if we are querying */
3198: S = &sdummy;
3199: St = &sdummy;
3200: eigs = &rdummy;
3201: eigv = &sdummy;
3202: B_iwork = &idummy;
3203: B_ifail = &idummy;
3204: #if defined(PETSC_USE_COMPLEX)
3205: rwork = &rdummy;
3206: #endif
3207: thresh = 1.0;
3208: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3209: #if defined(PETSC_USE_COMPLEX)
3210: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3211: #else
3212: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3213: #endif
3214: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3215: PetscFPTrapPop();
3216: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3217: }
3219: nv = 0;
3220: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3221: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3222: }
3223: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3224: if (allocated_S_St) {
3225: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3226: }
3227: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3228: #if defined(PETSC_USE_COMPLEX)
3229: PetscMalloc1(7*mss,&rwork);
3230: #endif
3231: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3232: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3233: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3234: nv+cum,&pcbddc->adaptive_constraints_idxs,
3235: nv+cum2,&pcbddc->adaptive_constraints_data);
3236: PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);
3238: maxneigs = 0;
3239: cum = cumarray = 0;
3240: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3241: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3242: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3243: const PetscInt *idxs;
3245: ISGetIndices(sub_schurs->is_vertices,&idxs);
3246: for (cum=0;cum<nv;cum++) {
3247: pcbddc->adaptive_constraints_n[cum] = 1;
3248: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3249: pcbddc->adaptive_constraints_data[cum] = 1.0;
3250: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3251: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3252: }
3253: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3254: }
3256: if (mss) { /* multilevel */
3257: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3258: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3259: }
3261: lthresh = pcbddc->adaptive_threshold[0];
3262: uthresh = pcbddc->adaptive_threshold[1];
3263: for (i=0;i<sub_schurs->n_subs;i++) {
3264: const PetscInt *idxs;
3265: PetscReal upper,lower;
3266: PetscInt j,subset_size,eigs_start = 0;
3267: PetscBLASInt B_N;
3268: PetscBool same_data = PETSC_FALSE;
3269: PetscBool scal = PETSC_FALSE;
3271: if (pcbddc->use_deluxe_scaling) {
3272: upper = PETSC_MAX_REAL;
3273: lower = uthresh;
3274: } else {
3275: if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3276: upper = 1./uthresh;
3277: lower = 0.;
3278: }
3279: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3280: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3281: PetscBLASIntCast(subset_size,&B_N);
3282: /* this is experimental: we assume the dofs have been properly grouped to have
3283: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3284: if (!sub_schurs->is_posdef) {
3285: Mat T;
3287: for (j=0;j<subset_size;j++) {
3288: if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3289: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3290: MatScale(T,-1.0);
3291: MatDestroy(&T);
3292: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3293: MatScale(T,-1.0);
3294: MatDestroy(&T);
3295: if (sub_schurs->change_primal_sub) {
3296: PetscInt nz,k;
3297: const PetscInt *idxs;
3299: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3300: ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3301: for (k=0;k<nz;k++) {
3302: *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3303: *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0;
3304: }
3305: ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3306: }
3307: scal = PETSC_TRUE;
3308: break;
3309: }
3310: }
3311: }
3313: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3314: if (sub_schurs->is_symmetric) {
3315: PetscInt j,k;
3316: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3317: PetscArrayzero(S,subset_size*subset_size);
3318: PetscArrayzero(St,subset_size*subset_size);
3319: }
3320: for (j=0;j<subset_size;j++) {
3321: for (k=j;k<subset_size;k++) {
3322: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3323: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3324: }
3325: }
3326: } else {
3327: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3328: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3329: }
3330: } else {
3331: S = Sarray + cumarray;
3332: St = Starray + cumarray;
3333: }
3334: /* see if we can save some work */
3335: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3336: PetscArraycmp(S,St,subset_size*subset_size,&same_data);
3337: }
3339: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3340: B_neigs = 0;
3341: } else {
3342: if (sub_schurs->is_symmetric) {
3343: PetscBLASInt B_itype = 1;
3344: PetscBLASInt B_IL, B_IU;
3345: PetscReal eps = -1.0; /* dlamch? */
3346: PetscInt nmin_s;
3347: PetscBool compute_range;
3349: B_neigs = 0;
3350: compute_range = (PetscBool)!same_data;
3351: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3353: if (pcbddc->dbg_flag) {
3354: PetscInt nc = 0;
3356: if (sub_schurs->change_primal_sub) {
3357: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3358: }
3359: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3360: }
3362: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3363: if (compute_range) {
3365: /* ask for eigenvalues larger than thresh */
3366: if (sub_schurs->is_posdef) {
3367: #if defined(PETSC_USE_COMPLEX)
3368: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3369: #else
3370: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3371: #endif
3372: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3373: } else { /* no theory so far, but it works nicely */
3374: PetscInt recipe = 0,recipe_m = 1;
3375: PetscReal bb[2];
3377: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3378: switch (recipe) {
3379: case 0:
3380: if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3381: else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3382: #if defined(PETSC_USE_COMPLEX)
3383: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3384: #else
3385: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3386: #endif
3387: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3388: break;
3389: case 1:
3390: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3391: #if defined(PETSC_USE_COMPLEX)
3392: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3393: #else
3394: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3395: #endif
3396: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3397: if (!scal) {
3398: PetscBLASInt B_neigs2 = 0;
3400: bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3401: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3402: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3403: #if defined(PETSC_USE_COMPLEX)
3404: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3405: #else
3406: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3407: #endif
3408: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3409: B_neigs += B_neigs2;
3410: }
3411: break;
3412: case 2:
3413: if (scal) {
3414: bb[0] = PETSC_MIN_REAL;
3415: bb[1] = 0;
3416: #if defined(PETSC_USE_COMPLEX)
3417: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3418: #else
3419: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3420: #endif
3421: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3422: } else {
3423: PetscBLASInt B_neigs2 = 0;
3424: PetscBool import = PETSC_FALSE;
3426: lthresh = PetscMax(lthresh,0.0);
3427: if (lthresh > 0.0) {
3428: bb[0] = PETSC_MIN_REAL;
3429: bb[1] = lthresh*lthresh;
3431: import = PETSC_TRUE;
3432: #if defined(PETSC_USE_COMPLEX)
3433: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3434: #else
3435: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3436: #endif
3437: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3438: }
3439: bb[0] = PetscMax(lthresh*lthresh,uthresh);
3440: bb[1] = PETSC_MAX_REAL;
3441: if (import) {
3442: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3443: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3444: }
3445: #if defined(PETSC_USE_COMPLEX)
3446: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3447: #else
3448: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3449: #endif
3450: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3451: B_neigs += B_neigs2;
3452: }
3453: break;
3454: case 3:
3455: if (scal) {
3456: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3457: } else {
3458: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3459: }
3460: if (!scal) {
3461: bb[0] = uthresh;
3462: bb[1] = PETSC_MAX_REAL;
3463: #if defined(PETSC_USE_COMPLEX)
3464: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3465: #else
3466: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3467: #endif
3468: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3469: }
3470: if (recipe_m > 0 && B_N - B_neigs > 0) {
3471: PetscBLASInt B_neigs2 = 0;
3473: B_IL = 1;
3474: PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3475: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3476: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3477: #if defined(PETSC_USE_COMPLEX)
3478: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3479: #else
3480: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3481: #endif
3482: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3483: B_neigs += B_neigs2;
3484: }
3485: break;
3486: case 4:
3487: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3488: #if defined(PETSC_USE_COMPLEX)
3489: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3490: #else
3491: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3492: #endif
3493: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3494: {
3495: PetscBLASInt B_neigs2 = 0;
3497: bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3498: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3499: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3500: #if defined(PETSC_USE_COMPLEX)
3501: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3502: #else
3503: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3504: #endif
3505: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3506: B_neigs += B_neigs2;
3507: }
3508: break;
3509: case 5: /* same as before: first compute all eigenvalues, then filter */
3510: #if defined(PETSC_USE_COMPLEX)
3511: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3512: #else
3513: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3514: #endif
3515: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3516: {
3517: PetscInt e,k,ne;
3518: for (e=0,ne=0;e<B_neigs;e++) {
3519: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3520: for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3521: eigs[ne] = eigs[e];
3522: ne++;
3523: }
3524: }
3525: PetscArraycpy(eigv,S,B_N*ne);
3526: B_neigs = ne;
3527: }
3528: break;
3529: default:
3530: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3531: break;
3532: }
3533: }
3534: } else if (!same_data) { /* this is just to see all the eigenvalues */
3535: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3536: B_IL = 1;
3537: #if defined(PETSC_USE_COMPLEX)
3538: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3539: #else
3540: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3541: #endif
3542: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3543: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3544: PetscInt k;
3545: if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3546: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3547: PetscBLASIntCast(nmax,&B_neigs);
3548: nmin = nmax;
3549: PetscArrayzero(eigv,subset_size*nmax);
3550: for (k=0;k<nmax;k++) {
3551: eigs[k] = 1./PETSC_SMALL;
3552: eigv[k*(subset_size+1)] = 1.0;
3553: }
3554: }
3555: PetscFPTrapPop();
3556: if (B_ierr) {
3557: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3558: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3559: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3560: }
3562: if (B_neigs > nmax) {
3563: if (pcbddc->dbg_flag) {
3564: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3565: }
3566: if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3567: B_neigs = nmax;
3568: }
3570: nmin_s = PetscMin(nmin,B_N);
3571: if (B_neigs < nmin_s) {
3572: PetscBLASInt B_neigs2 = 0;
3574: if (pcbddc->use_deluxe_scaling) {
3575: if (scal) {
3576: B_IU = nmin_s;
3577: B_IL = B_neigs + 1;
3578: } else {
3579: B_IL = B_N - nmin_s + 1;
3580: B_IU = B_N - B_neigs;
3581: }
3582: } else {
3583: B_IL = B_neigs + 1;
3584: B_IU = nmin_s;
3585: }
3586: if (pcbddc->dbg_flag) {
3587: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3588: }
3589: if (sub_schurs->is_symmetric) {
3590: PetscInt j,k;
3591: for (j=0;j<subset_size;j++) {
3592: for (k=j;k<subset_size;k++) {
3593: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3594: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3595: }
3596: }
3597: } else {
3598: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3599: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3600: }
3601: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3602: #if defined(PETSC_USE_COMPLEX)
3603: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3604: #else
3605: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3606: #endif
3607: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3608: PetscFPTrapPop();
3609: B_neigs += B_neigs2;
3610: }
3611: if (B_ierr) {
3612: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3613: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3614: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3615: }
3616: if (pcbddc->dbg_flag) {
3617: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3618: for (j=0;j<B_neigs;j++) {
3619: if (eigs[j] == 0.0) {
3620: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3621: } else {
3622: if (pcbddc->use_deluxe_scaling) {
3623: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3624: } else {
3625: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3626: }
3627: }
3628: }
3629: }
3630: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3631: }
3632: /* change the basis back to the original one */
3633: if (sub_schurs->change) {
3634: Mat change,phi,phit;
3636: if (pcbddc->dbg_flag > 2) {
3637: PetscInt ii;
3638: for (ii=0;ii<B_neigs;ii++) {
3639: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3640: for (j=0;j<B_N;j++) {
3641: #if defined(PETSC_USE_COMPLEX)
3642: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3643: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3644: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3645: #else
3646: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3647: #endif
3648: }
3649: }
3650: }
3651: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3652: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3653: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3654: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3655: MatDestroy(&phit);
3656: MatDestroy(&phi);
3657: }
3658: maxneigs = PetscMax(B_neigs,maxneigs);
3659: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3660: if (B_neigs) {
3661: PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);
3663: if (pcbddc->dbg_flag > 1) {
3664: PetscInt ii;
3665: for (ii=0;ii<B_neigs;ii++) {
3666: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3667: for (j=0;j<B_N;j++) {
3668: #if defined(PETSC_USE_COMPLEX)
3669: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3670: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3671: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3672: #else
3673: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3674: #endif
3675: }
3676: }
3677: }
3678: PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);
3679: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3680: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3681: cum++;
3682: }
3683: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3684: /* shift for next computation */
3685: cumarray += subset_size*subset_size;
3686: }
3687: if (pcbddc->dbg_flag) {
3688: PetscViewerFlush(pcbddc->dbg_viewer);
3689: }
3691: if (mss) {
3692: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3693: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3694: /* destroy matrices (junk) */
3695: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3696: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3697: }
3698: if (allocated_S_St) {
3699: PetscFree2(S,St);
3700: }
3701: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3702: #if defined(PETSC_USE_COMPLEX)
3703: PetscFree(rwork);
3704: #endif
3705: if (pcbddc->dbg_flag) {
3706: PetscInt maxneigs_r;
3707: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3708: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3709: }
3710: PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3711: return(0);
3712: }
3714: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3715: {
3716: PetscScalar *coarse_submat_vals;
3720: /* Setup local scatters R_to_B and (optionally) R_to_D */
3721: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3722: PCBDDCSetUpLocalScatters(pc);
3724: /* Setup local neumann solver ksp_R */
3725: /* PCBDDCSetUpLocalScatters should be called first! */
3726: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3728: /*
3729: Setup local correction and local part of coarse basis.
3730: Gives back the dense local part of the coarse matrix in column major ordering
3731: */
3732: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3734: /* Compute total number of coarse nodes and setup coarse solver */
3735: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3737: /* free */
3738: PetscFree(coarse_submat_vals);
3739: return(0);
3740: }
3742: PetscErrorCode PCBDDCResetCustomization(PC pc)
3743: {
3744: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3748: ISDestroy(&pcbddc->user_primal_vertices);
3749: ISDestroy(&pcbddc->user_primal_vertices_local);
3750: ISDestroy(&pcbddc->NeumannBoundaries);
3751: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3752: ISDestroy(&pcbddc->DirichletBoundaries);
3753: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3754: PetscFree(pcbddc->onearnullvecs_state);
3755: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3756: PCBDDCSetDofsSplitting(pc,0,NULL);
3757: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3758: return(0);
3759: }
3761: PetscErrorCode PCBDDCResetTopography(PC pc)
3762: {
3763: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3764: PetscInt i;
3768: MatDestroy(&pcbddc->nedcG);
3769: ISDestroy(&pcbddc->nedclocal);
3770: MatDestroy(&pcbddc->discretegradient);
3771: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3772: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3773: MatDestroy(&pcbddc->switch_static_change);
3774: VecDestroy(&pcbddc->work_change);
3775: MatDestroy(&pcbddc->ConstraintMatrix);
3776: MatDestroy(&pcbddc->divudotp);
3777: ISDestroy(&pcbddc->divudotp_vl2l);
3778: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3779: for (i=0;i<pcbddc->n_local_subs;i++) {
3780: ISDestroy(&pcbddc->local_subs[i]);
3781: }
3782: pcbddc->n_local_subs = 0;
3783: PetscFree(pcbddc->local_subs);
3784: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3785: pcbddc->graphanalyzed = PETSC_FALSE;
3786: pcbddc->recompute_topography = PETSC_TRUE;
3787: pcbddc->corner_selected = PETSC_FALSE;
3788: return(0);
3789: }
3791: PetscErrorCode PCBDDCResetSolvers(PC pc)
3792: {
3793: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3797: VecDestroy(&pcbddc->coarse_vec);
3798: if (pcbddc->coarse_phi_B) {
3799: PetscScalar *array;
3800: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3801: PetscFree(array);
3802: }
3803: MatDestroy(&pcbddc->coarse_phi_B);
3804: MatDestroy(&pcbddc->coarse_phi_D);
3805: MatDestroy(&pcbddc->coarse_psi_B);
3806: MatDestroy(&pcbddc->coarse_psi_D);
3807: VecDestroy(&pcbddc->vec1_P);
3808: VecDestroy(&pcbddc->vec1_C);
3809: MatDestroy(&pcbddc->local_auxmat2);
3810: MatDestroy(&pcbddc->local_auxmat1);
3811: VecDestroy(&pcbddc->vec1_R);
3812: VecDestroy(&pcbddc->vec2_R);
3813: ISDestroy(&pcbddc->is_R_local);
3814: VecScatterDestroy(&pcbddc->R_to_B);
3815: VecScatterDestroy(&pcbddc->R_to_D);
3816: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3817: KSPReset(pcbddc->ksp_D);
3818: KSPReset(pcbddc->ksp_R);
3819: KSPReset(pcbddc->coarse_ksp);
3820: MatDestroy(&pcbddc->local_mat);
3821: PetscFree(pcbddc->primal_indices_local_idxs);
3822: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3823: PetscFree(pcbddc->global_primal_indices);
3824: ISDestroy(&pcbddc->coarse_subassembling);
3825: MatDestroy(&pcbddc->benign_change);
3826: VecDestroy(&pcbddc->benign_vec);
3827: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3828: MatDestroy(&pcbddc->benign_B0);
3829: PetscSFDestroy(&pcbddc->benign_sf);
3830: if (pcbddc->benign_zerodiag_subs) {
3831: PetscInt i;
3832: for (i=0;i<pcbddc->benign_n;i++) {
3833: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3834: }
3835: PetscFree(pcbddc->benign_zerodiag_subs);
3836: }
3837: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3838: return(0);
3839: }
3841: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3842: {
3843: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3844: PC_IS *pcis = (PC_IS*)pc->data;
3845: VecType impVecType;
3846: PetscInt n_constraints,n_R,old_size;
3850: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3851: n_R = pcis->n - pcbddc->n_vertices;
3852: VecGetType(pcis->vec1_N,&impVecType);
3853: /* local work vectors (try to avoid unneeded work)*/
3854: /* R nodes */
3855: old_size = -1;
3856: if (pcbddc->vec1_R) {
3857: VecGetSize(pcbddc->vec1_R,&old_size);
3858: }
3859: if (n_R != old_size) {
3860: VecDestroy(&pcbddc->vec1_R);
3861: VecDestroy(&pcbddc->vec2_R);
3862: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3863: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3864: VecSetType(pcbddc->vec1_R,impVecType);
3865: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3866: }
3867: /* local primal dofs */
3868: old_size = -1;
3869: if (pcbddc->vec1_P) {
3870: VecGetSize(pcbddc->vec1_P,&old_size);
3871: }
3872: if (pcbddc->local_primal_size != old_size) {
3873: VecDestroy(&pcbddc->vec1_P);
3874: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3875: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3876: VecSetType(pcbddc->vec1_P,impVecType);
3877: }
3878: /* local explicit constraints */
3879: old_size = -1;
3880: if (pcbddc->vec1_C) {
3881: VecGetSize(pcbddc->vec1_C,&old_size);
3882: }
3883: if (n_constraints && n_constraints != old_size) {
3884: VecDestroy(&pcbddc->vec1_C);
3885: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3886: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3887: VecSetType(pcbddc->vec1_C,impVecType);
3888: }
3889: return(0);
3890: }
3892: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3893: {
3894: PetscErrorCode ierr;
3895: /* pointers to pcis and pcbddc */
3896: PC_IS* pcis = (PC_IS*)pc->data;
3897: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3898: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3899: /* submatrices of local problem */
3900: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3901: /* submatrices of local coarse problem */
3902: Mat S_VV,S_CV,S_VC,S_CC;
3903: /* working matrices */
3904: Mat C_CR;
3905: /* additional working stuff */
3906: PC pc_R;
3907: Mat F,Brhs = NULL;
3908: Vec dummy_vec;
3909: PetscBool isLU,isCHOL,need_benign_correction,sparserhs;
3910: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3911: PetscScalar *work;
3912: PetscInt *idx_V_B;
3913: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3914: PetscInt i,n_R,n_D,n_B;
3915: PetscScalar one=1.0,m_one=-1.0;
3918: if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3919: PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
3921: /* Set Non-overlapping dimensions */
3922: n_vertices = pcbddc->n_vertices;
3923: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3924: n_B = pcis->n_B;
3925: n_D = pcis->n - n_B;
3926: n_R = pcis->n - n_vertices;
3928: /* vertices in boundary numbering */
3929: PetscMalloc1(n_vertices,&idx_V_B);
3930: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3931: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);
3933: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3934: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3935: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3936: MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3937: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3938: MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3939: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3940: MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3941: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3942: MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);
3944: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3945: KSPGetPC(pcbddc->ksp_R,&pc_R);
3946: PCSetUp(pc_R);
3947: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3948: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3949: lda_rhs = n_R;
3950: need_benign_correction = PETSC_FALSE;
3951: if (isLU || isCHOL) {
3952: PCFactorGetMatrix(pc_R,&F);
3953: } else if (sub_schurs && sub_schurs->reuse_solver) {
3954: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3955: MatFactorType type;
3957: F = reuse_solver->F;
3958: MatGetFactorType(F,&type);
3959: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3960: if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3961: MatGetSize(F,&lda_rhs,NULL);
3962: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3963: } else F = NULL;
3965: /* determine if we can use a sparse right-hand side */
3966: sparserhs = PETSC_FALSE;
3967: if (F) {
3968: MatSolverType solver;
3970: MatFactorGetSolverType(F,&solver);
3971: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3972: }
3974: /* allocate workspace */
3975: n = 0;
3976: if (n_constraints) {
3977: n += lda_rhs*n_constraints;
3978: }
3979: if (n_vertices) {
3980: n = PetscMax(2*lda_rhs*n_vertices,n);
3981: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3982: }
3983: if (!pcbddc->symmetric_primal) {
3984: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3985: }
3986: PetscMalloc1(n,&work);
3988: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3989: dummy_vec = NULL;
3990: if (need_benign_correction && lda_rhs != n_R && F) {
3991: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3992: VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3993: VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3994: }
3996: MatDestroy(&pcbddc->local_auxmat1);
3997: MatDestroy(&pcbddc->local_auxmat2);
3999: /* Precompute stuffs needed for preprocessing and application of BDDC*/
4000: if (n_constraints) {
4001: Mat M3,C_B;
4002: IS is_aux;
4003: PetscScalar *array,*array2;
4005: /* Extract constraints on R nodes: C_{CR} */
4006: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
4007: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
4008: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4010: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4011: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4012: if (!sparserhs) {
4013: PetscArrayzero(work,lda_rhs*n_constraints);
4014: for (i=0;i<n_constraints;i++) {
4015: const PetscScalar *row_cmat_values;
4016: const PetscInt *row_cmat_indices;
4017: PetscInt size_of_constraint,j;
4019: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4020: for (j=0;j<size_of_constraint;j++) {
4021: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
4022: }
4023: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4024: }
4025: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
4026: } else {
4027: Mat tC_CR;
4029: MatScale(C_CR,-1.0);
4030: if (lda_rhs != n_R) {
4031: PetscScalar *aa;
4032: PetscInt r,*ii,*jj;
4033: PetscBool done;
4035: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4036: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4037: MatSeqAIJGetArray(C_CR,&aa);
4038: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4039: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4040: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4041: } else {
4042: PetscObjectReference((PetscObject)C_CR);
4043: tC_CR = C_CR;
4044: }
4045: MatCreateTranspose(tC_CR,&Brhs);
4046: MatDestroy(&tC_CR);
4047: }
4048: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4049: if (F) {
4050: if (need_benign_correction) {
4051: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4053: /* rhs is already zero on interior dofs, no need to change the rhs */
4054: PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);
4055: }
4056: MatMatSolve(F,Brhs,local_auxmat2_R);
4057: if (need_benign_correction) {
4058: PetscScalar *marr;
4059: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4061: MatDenseGetArray(local_auxmat2_R,&marr);
4062: if (lda_rhs != n_R) {
4063: for (i=0;i<n_constraints;i++) {
4064: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4065: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4066: VecResetArray(dummy_vec);
4067: }
4068: } else {
4069: for (i=0;i<n_constraints;i++) {
4070: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4071: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4072: VecResetArray(pcbddc->vec1_R);
4073: }
4074: }
4075: MatDenseRestoreArray(local_auxmat2_R,&marr);
4076: }
4077: } else {
4078: PetscScalar *marr;
4080: MatDenseGetArray(local_auxmat2_R,&marr);
4081: for (i=0;i<n_constraints;i++) {
4082: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4083: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4084: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4085: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4086: VecResetArray(pcbddc->vec1_R);
4087: VecResetArray(pcbddc->vec2_R);
4088: }
4089: MatDenseRestoreArray(local_auxmat2_R,&marr);
4090: }
4091: if (sparserhs) {
4092: MatScale(C_CR,-1.0);
4093: }
4094: MatDestroy(&Brhs);
4095: if (!pcbddc->switch_static) {
4096: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4097: MatDenseGetArray(pcbddc->local_auxmat2,&array);
4098: MatDenseGetArray(local_auxmat2_R,&array2);
4099: for (i=0;i<n_constraints;i++) {
4100: VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4101: VecPlaceArray(pcis->vec1_B,array+i*n_B);
4102: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4103: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4104: VecResetArray(pcis->vec1_B);
4105: VecResetArray(pcbddc->vec1_R);
4106: }
4107: MatDenseRestoreArray(local_auxmat2_R,&array2);
4108: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4109: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4110: } else {
4111: if (lda_rhs != n_R) {
4112: IS dummy;
4114: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4115: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4116: ISDestroy(&dummy);
4117: } else {
4118: PetscObjectReference((PetscObject)local_auxmat2_R);
4119: pcbddc->local_auxmat2 = local_auxmat2_R;
4120: }
4121: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4122: }
4123: ISDestroy(&is_aux);
4124: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
4125: MatScale(M3,m_one);
4126: if (isCHOL) {
4127: MatCholeskyFactor(M3,NULL,NULL);
4128: } else {
4129: MatLUFactor(M3,NULL,NULL,NULL);
4130: }
4131: MatSeqDenseInvertFactors_Private(M3);
4132: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4133: MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4134: MatDestroy(&C_B);
4135: MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4136: MatDestroy(&M3);
4137: }
4139: /* Get submatrices from subdomain matrix */
4140: if (n_vertices) {
4141: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4142: PetscBool oldpin;
4143: #endif
4144: PetscBool isaij;
4145: IS is_aux;
4147: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4148: IS tis;
4150: ISDuplicate(pcbddc->is_R_local,&tis);
4151: ISSort(tis);
4152: ISComplement(tis,0,pcis->n,&is_aux);
4153: ISDestroy(&tis);
4154: } else {
4155: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4156: }
4157: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4158: oldpin = pcbddc->local_mat->pinnedtocpu;
4159: #endif
4160: MatPinToCPU(pcbddc->local_mat,PETSC_TRUE);
4161: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4162: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4163: PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);
4164: if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4165: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4166: }
4167: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4168: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4169: MatPinToCPU(pcbddc->local_mat,oldpin);
4170: #endif
4171: ISDestroy(&is_aux);
4172: }
4174: /* Matrix of coarse basis functions (local) */
4175: if (pcbddc->coarse_phi_B) {
4176: PetscInt on_B,on_primal,on_D=n_D;
4177: if (pcbddc->coarse_phi_D) {
4178: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4179: }
4180: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4181: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4182: PetscScalar *marray;
4184: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4185: PetscFree(marray);
4186: MatDestroy(&pcbddc->coarse_phi_B);
4187: MatDestroy(&pcbddc->coarse_psi_B);
4188: MatDestroy(&pcbddc->coarse_phi_D);
4189: MatDestroy(&pcbddc->coarse_psi_D);
4190: }
4191: }
4193: if (!pcbddc->coarse_phi_B) {
4194: PetscScalar *marr;
4196: /* memory size */
4197: n = n_B*pcbddc->local_primal_size;
4198: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4199: if (!pcbddc->symmetric_primal) n *= 2;
4200: PetscCalloc1(n,&marr);
4201: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4202: marr += n_B*pcbddc->local_primal_size;
4203: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4204: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4205: marr += n_D*pcbddc->local_primal_size;
4206: }
4207: if (!pcbddc->symmetric_primal) {
4208: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4209: marr += n_B*pcbddc->local_primal_size;
4210: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4211: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4212: }
4213: } else {
4214: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4215: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4216: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4217: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4218: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4219: }
4220: }
4221: }
4223: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4224: p0_lidx_I = NULL;
4225: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4226: const PetscInt *idxs;
4228: ISGetIndices(pcis->is_I_local,&idxs);
4229: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4230: for (i=0;i<pcbddc->benign_n;i++) {
4231: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4232: }
4233: ISRestoreIndices(pcis->is_I_local,&idxs);
4234: }
4236: /* vertices */
4237: if (n_vertices) {
4238: PetscBool restoreavr = PETSC_FALSE;
4240: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
4242: if (n_R) {
4243: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4244: PetscBLASInt B_N,B_one = 1;
4245: const PetscScalar *x;
4246: PetscScalar *y;
4248: MatScale(A_RV,m_one);
4249: if (need_benign_correction) {
4250: ISLocalToGlobalMapping RtoN;
4251: IS is_p0;
4252: PetscInt *idxs_p0,n;
4254: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4255: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4256: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4257: if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4258: ISLocalToGlobalMappingDestroy(&RtoN);
4259: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4260: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4261: ISDestroy(&is_p0);
4262: }
4264: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4265: if (!sparserhs || need_benign_correction) {
4266: if (lda_rhs == n_R) {
4267: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4268: } else {
4269: PetscScalar *av,*array;
4270: const PetscInt *xadj,*adjncy;
4271: PetscInt n;
4272: PetscBool flg_row;
4274: array = work+lda_rhs*n_vertices;
4275: PetscArrayzero(array,lda_rhs*n_vertices);
4276: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4277: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4278: MatSeqAIJGetArray(A_RV,&av);
4279: for (i=0;i<n;i++) {
4280: PetscInt j;
4281: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4282: }
4283: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4284: MatDestroy(&A_RV);
4285: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4286: }
4287: if (need_benign_correction) {
4288: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4289: PetscScalar *marr;
4291: MatDenseGetArray(A_RV,&marr);
4292: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4294: | 0 0 0 | (V)
4295: L = | 0 0 -1 | (P-p0)
4296: | 0 0 -1 | (p0)
4298: */
4299: for (i=0;i<reuse_solver->benign_n;i++) {
4300: const PetscScalar *vals;
4301: const PetscInt *idxs,*idxs_zero;
4302: PetscInt n,j,nz;
4304: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4305: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4306: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4307: for (j=0;j<n;j++) {
4308: PetscScalar val = vals[j];
4309: PetscInt k,col = idxs[j];
4310: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4311: }
4312: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4313: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4314: }
4315: MatDenseRestoreArray(A_RV,&marr);
4316: }
4317: PetscObjectReference((PetscObject)A_RV);
4318: Brhs = A_RV;
4319: } else {
4320: Mat tA_RVT,A_RVT;
4322: if (!pcbddc->symmetric_primal) {
4323: /* A_RV already scaled by -1 */
4324: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4325: } else {
4326: restoreavr = PETSC_TRUE;
4327: MatScale(A_VR,-1.0);
4328: PetscObjectReference((PetscObject)A_VR);
4329: A_RVT = A_VR;
4330: }
4331: if (lda_rhs != n_R) {
4332: PetscScalar *aa;
4333: PetscInt r,*ii,*jj;
4334: PetscBool done;
4336: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4337: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4338: MatSeqAIJGetArray(A_RVT,&aa);
4339: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4340: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4341: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4342: } else {
4343: PetscObjectReference((PetscObject)A_RVT);
4344: tA_RVT = A_RVT;
4345: }
4346: MatCreateTranspose(tA_RVT,&Brhs);
4347: MatDestroy(&tA_RVT);
4348: MatDestroy(&A_RVT);
4349: }
4350: if (F) {
4351: /* need to correct the rhs */
4352: if (need_benign_correction) {
4353: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4354: PetscScalar *marr;
4356: MatDenseGetArray(Brhs,&marr);
4357: if (lda_rhs != n_R) {
4358: for (i=0;i<n_vertices;i++) {
4359: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4360: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4361: VecResetArray(dummy_vec);
4362: }
4363: } else {
4364: for (i=0;i<n_vertices;i++) {
4365: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4366: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4367: VecResetArray(pcbddc->vec1_R);
4368: }
4369: }
4370: MatDenseRestoreArray(Brhs,&marr);
4371: }
4372: MatMatSolve(F,Brhs,A_RRmA_RV);
4373: if (restoreavr) {
4374: MatScale(A_VR,-1.0);
4375: }
4376: /* need to correct the solution */
4377: if (need_benign_correction) {
4378: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4379: PetscScalar *marr;
4381: MatDenseGetArray(A_RRmA_RV,&marr);
4382: if (lda_rhs != n_R) {
4383: for (i=0;i<n_vertices;i++) {
4384: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4385: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4386: VecResetArray(dummy_vec);
4387: }
4388: } else {
4389: for (i=0;i<n_vertices;i++) {
4390: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4391: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4392: VecResetArray(pcbddc->vec1_R);
4393: }
4394: }
4395: MatDenseRestoreArray(A_RRmA_RV,&marr);
4396: }
4397: } else {
4398: MatDenseGetArray(Brhs,&y);
4399: for (i=0;i<n_vertices;i++) {
4400: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4401: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4402: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4403: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4404: VecResetArray(pcbddc->vec1_R);
4405: VecResetArray(pcbddc->vec2_R);
4406: }
4407: MatDenseRestoreArray(Brhs,&y);
4408: }
4409: MatDestroy(&A_RV);
4410: MatDestroy(&Brhs);
4411: /* S_VV and S_CV */
4412: if (n_constraints) {
4413: Mat B;
4415: PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);
4416: for (i=0;i<n_vertices;i++) {
4417: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4418: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4419: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4420: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4421: VecResetArray(pcis->vec1_B);
4422: VecResetArray(pcbddc->vec1_R);
4423: }
4424: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4425: MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4426: MatDestroy(&B);
4427: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4428: MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4429: MatScale(S_CV,m_one);
4430: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4431: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4432: MatDestroy(&B);
4433: }
4434: if (lda_rhs != n_R) {
4435: MatDestroy(&A_RRmA_RV);
4436: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4437: MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4438: }
4439: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4440: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4441: if (need_benign_correction) {
4442: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4443: PetscScalar *marr,*sums;
4445: PetscMalloc1(n_vertices,&sums);
4446: MatDenseGetArray(S_VVt,&marr);
4447: for (i=0;i<reuse_solver->benign_n;i++) {
4448: const PetscScalar *vals;
4449: const PetscInt *idxs,*idxs_zero;
4450: PetscInt n,j,nz;
4452: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4453: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4454: for (j=0;j<n_vertices;j++) {
4455: PetscInt k;
4456: sums[j] = 0.;
4457: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4458: }
4459: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4460: for (j=0;j<n;j++) {
4461: PetscScalar val = vals[j];
4462: PetscInt k;
4463: for (k=0;k<n_vertices;k++) {
4464: marr[idxs[j]+k*n_vertices] += val*sums[k];
4465: }
4466: }
4467: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4468: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4469: }
4470: PetscFree(sums);
4471: MatDenseRestoreArray(S_VVt,&marr);
4472: MatDestroy(&A_RV_bcorr);
4473: }
4474: MatDestroy(&A_RRmA_RV);
4475: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4476: MatDenseGetArrayRead(A_VV,&x);
4477: MatDenseGetArray(S_VVt,&y);
4478: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4479: MatDenseRestoreArrayRead(A_VV,&x);
4480: MatDenseRestoreArray(S_VVt,&y);
4481: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4482: MatDestroy(&S_VVt);
4483: } else {
4484: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4485: }
4486: MatDestroy(&A_VV);
4488: /* coarse basis functions */
4489: for (i=0;i<n_vertices;i++) {
4490: PetscScalar *y;
4492: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4493: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4494: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4495: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4496: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4497: y[n_B*i+idx_V_B[i]] = 1.0;
4498: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4499: VecResetArray(pcis->vec1_B);
4501: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4502: PetscInt j;
4504: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4505: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4506: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4507: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4508: VecResetArray(pcis->vec1_D);
4509: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4510: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4511: }
4512: VecResetArray(pcbddc->vec1_R);
4513: }
4514: /* if n_R == 0 the object is not destroyed */
4515: MatDestroy(&A_RV);
4516: }
4517: VecDestroy(&dummy_vec);
4519: if (n_constraints) {
4520: Mat B;
4522: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4523: MatScale(S_CC,m_one);
4524: MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4525: MatScale(S_CC,m_one);
4526: if (n_vertices) {
4527: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4528: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4529: } else {
4530: Mat S_VCt;
4532: if (lda_rhs != n_R) {
4533: MatDestroy(&B);
4534: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4535: MatSeqDenseSetLDA(B,lda_rhs);
4536: }
4537: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4538: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4539: MatDestroy(&S_VCt);
4540: }
4541: }
4542: MatDestroy(&B);
4543: /* coarse basis functions */
4544: for (i=0;i<n_constraints;i++) {
4545: PetscScalar *y;
4547: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4548: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4549: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4550: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4551: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4552: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4553: VecResetArray(pcis->vec1_B);
4554: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4555: PetscInt j;
4557: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4558: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4559: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4560: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4561: VecResetArray(pcis->vec1_D);
4562: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4563: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4564: }
4565: VecResetArray(pcbddc->vec1_R);
4566: }
4567: }
4568: if (n_constraints) {
4569: MatDestroy(&local_auxmat2_R);
4570: }
4571: PetscFree(p0_lidx_I);
4573: /* coarse matrix entries relative to B_0 */
4574: if (pcbddc->benign_n) {
4575: Mat B0_B,B0_BPHI;
4576: IS is_dummy;
4577: const PetscScalar *data;
4578: PetscInt j;
4580: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4581: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4582: ISDestroy(&is_dummy);
4583: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4584: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4585: MatDenseGetArrayRead(B0_BPHI,&data);
4586: for (j=0;j<pcbddc->benign_n;j++) {
4587: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4588: for (i=0;i<pcbddc->local_primal_size;i++) {
4589: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4590: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4591: }
4592: }
4593: MatDenseRestoreArrayRead(B0_BPHI,&data);
4594: MatDestroy(&B0_B);
4595: MatDestroy(&B0_BPHI);
4596: }
4598: /* compute other basis functions for non-symmetric problems */
4599: if (!pcbddc->symmetric_primal) {
4600: Mat B_V=NULL,B_C=NULL;
4601: PetscScalar *marray;
4603: if (n_constraints) {
4604: Mat S_CCT,C_CRT;
4606: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4607: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4608: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4609: MatDestroy(&S_CCT);
4610: if (n_vertices) {
4611: Mat S_VCT;
4613: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4614: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4615: MatDestroy(&S_VCT);
4616: }
4617: MatDestroy(&C_CRT);
4618: } else {
4619: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4620: }
4621: if (n_vertices && n_R) {
4622: PetscScalar *av,*marray;
4623: const PetscInt *xadj,*adjncy;
4624: PetscInt n;
4625: PetscBool flg_row;
4627: /* B_V = B_V - A_VR^T */
4628: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4629: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4630: MatSeqAIJGetArray(A_VR,&av);
4631: MatDenseGetArray(B_V,&marray);
4632: for (i=0;i<n;i++) {
4633: PetscInt j;
4634: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4635: }
4636: MatDenseRestoreArray(B_V,&marray);
4637: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4638: MatDestroy(&A_VR);
4639: }
4641: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4642: if (n_vertices) {
4643: MatDenseGetArray(B_V,&marray);
4644: for (i=0;i<n_vertices;i++) {
4645: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4646: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4647: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4648: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4649: VecResetArray(pcbddc->vec1_R);
4650: VecResetArray(pcbddc->vec2_R);
4651: }
4652: MatDenseRestoreArray(B_V,&marray);
4653: }
4654: if (B_C) {
4655: MatDenseGetArray(B_C,&marray);
4656: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4657: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4658: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4659: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4660: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4661: VecResetArray(pcbddc->vec1_R);
4662: VecResetArray(pcbddc->vec2_R);
4663: }
4664: MatDenseRestoreArray(B_C,&marray);
4665: }
4666: /* coarse basis functions */
4667: for (i=0;i<pcbddc->local_primal_size;i++) {
4668: PetscScalar *y;
4670: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4671: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4672: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4673: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4674: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4675: if (i<n_vertices) {
4676: y[n_B*i+idx_V_B[i]] = 1.0;
4677: }
4678: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4679: VecResetArray(pcis->vec1_B);
4681: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4682: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4683: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4684: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4685: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4686: VecResetArray(pcis->vec1_D);
4687: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4688: }
4689: VecResetArray(pcbddc->vec1_R);
4690: }
4691: MatDestroy(&B_V);
4692: MatDestroy(&B_C);
4693: }
4695: /* free memory */
4696: PetscFree(idx_V_B);
4697: MatDestroy(&S_VV);
4698: MatDestroy(&S_CV);
4699: MatDestroy(&S_VC);
4700: MatDestroy(&S_CC);
4701: PetscFree(work);
4702: if (n_vertices) {
4703: MatDestroy(&A_VR);
4704: }
4705: if (n_constraints) {
4706: MatDestroy(&C_CR);
4707: }
4708: PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4710: /* Checking coarse_sub_mat and coarse basis functios */
4711: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4712: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4713: if (pcbddc->dbg_flag) {
4714: Mat coarse_sub_mat;
4715: Mat AUXMAT,TM1,TM2,TM3,TM4;
4716: Mat coarse_phi_D,coarse_phi_B;
4717: Mat coarse_psi_D,coarse_psi_B;
4718: Mat A_II,A_BB,A_IB,A_BI;
4719: Mat C_B,CPHI;
4720: IS is_dummy;
4721: Vec mones;
4722: MatType checkmattype=MATSEQAIJ;
4723: PetscReal real_value;
4725: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4726: Mat A;
4727: PCBDDCBenignProject(pc,NULL,NULL,&A);
4728: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4729: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4730: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4731: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4732: MatDestroy(&A);
4733: } else {
4734: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4735: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4736: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4737: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4738: }
4739: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4740: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4741: if (!pcbddc->symmetric_primal) {
4742: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4743: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4744: }
4745: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4747: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4748: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4749: PetscViewerFlush(pcbddc->dbg_viewer);
4750: if (!pcbddc->symmetric_primal) {
4751: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4752: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4753: MatDestroy(&AUXMAT);
4754: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4755: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4756: MatDestroy(&AUXMAT);
4757: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4758: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4759: MatDestroy(&AUXMAT);
4760: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4761: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4762: MatDestroy(&AUXMAT);
4763: } else {
4764: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4765: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4766: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4767: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4768: MatDestroy(&AUXMAT);
4769: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4770: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4771: MatDestroy(&AUXMAT);
4772: }
4773: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4774: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4775: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4776: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4777: if (pcbddc->benign_n) {
4778: Mat B0_B,B0_BPHI;
4779: const PetscScalar *data2;
4780: PetscScalar *data;
4781: PetscInt j;
4783: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4784: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4785: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4786: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4787: MatDenseGetArray(TM1,&data);
4788: MatDenseGetArrayRead(B0_BPHI,&data2);
4789: for (j=0;j<pcbddc->benign_n;j++) {
4790: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4791: for (i=0;i<pcbddc->local_primal_size;i++) {
4792: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4793: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4794: }
4795: }
4796: MatDenseRestoreArray(TM1,&data);
4797: MatDenseRestoreArrayRead(B0_BPHI,&data2);
4798: MatDestroy(&B0_B);
4799: ISDestroy(&is_dummy);
4800: MatDestroy(&B0_BPHI);
4801: }
4802: #if 0
4803: {
4804: PetscViewer viewer;
4805: char filename[256];
4806: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4807: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4808: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4809: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4810: MatView(coarse_sub_mat,viewer);
4811: PetscObjectSetName((PetscObject)TM1,"projected");
4812: MatView(TM1,viewer);
4813: if (pcbddc->coarse_phi_B) {
4814: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4815: MatView(pcbddc->coarse_phi_B,viewer);
4816: }
4817: if (pcbddc->coarse_phi_D) {
4818: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4819: MatView(pcbddc->coarse_phi_D,viewer);
4820: }
4821: if (pcbddc->coarse_psi_B) {
4822: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4823: MatView(pcbddc->coarse_psi_B,viewer);
4824: }
4825: if (pcbddc->coarse_psi_D) {
4826: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4827: MatView(pcbddc->coarse_psi_D,viewer);
4828: }
4829: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4830: MatView(pcbddc->local_mat,viewer);
4831: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4832: MatView(pcbddc->ConstraintMatrix,viewer);
4833: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4834: ISView(pcis->is_I_local,viewer);
4835: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4836: ISView(pcis->is_B_local,viewer);
4837: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4838: ISView(pcbddc->is_R_local,viewer);
4839: PetscViewerDestroy(&viewer);
4840: }
4841: #endif
4842: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4843: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4844: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4845: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4847: /* check constraints */
4848: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4849: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4850: if (!pcbddc->benign_n) { /* TODO: add benign case */
4851: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4852: } else {
4853: PetscScalar *data;
4854: Mat tmat;
4855: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4856: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4857: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4858: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4859: MatDestroy(&tmat);
4860: }
4861: MatCreateVecs(CPHI,&mones,NULL);
4862: VecSet(mones,-1.0);
4863: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4864: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4865: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4866: if (!pcbddc->symmetric_primal) {
4867: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4868: VecSet(mones,-1.0);
4869: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4870: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4871: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4872: }
4873: MatDestroy(&C_B);
4874: MatDestroy(&CPHI);
4875: ISDestroy(&is_dummy);
4876: VecDestroy(&mones);
4877: PetscViewerFlush(pcbddc->dbg_viewer);
4878: MatDestroy(&A_II);
4879: MatDestroy(&A_BB);
4880: MatDestroy(&A_IB);
4881: MatDestroy(&A_BI);
4882: MatDestroy(&TM1);
4883: MatDestroy(&TM2);
4884: MatDestroy(&TM3);
4885: MatDestroy(&TM4);
4886: MatDestroy(&coarse_phi_D);
4887: MatDestroy(&coarse_phi_B);
4888: if (!pcbddc->symmetric_primal) {
4889: MatDestroy(&coarse_psi_D);
4890: MatDestroy(&coarse_psi_B);
4891: }
4892: MatDestroy(&coarse_sub_mat);
4893: }
4894: /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4895: {
4896: PetscBool gpu;
4898: PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);
4899: if (gpu) {
4900: if (pcbddc->local_auxmat1) {
4901: MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);
4902: }
4903: if (pcbddc->local_auxmat2) {
4904: MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);
4905: }
4906: if (pcbddc->coarse_phi_B) {
4907: MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);
4908: }
4909: if (pcbddc->coarse_phi_D) {
4910: MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);
4911: }
4912: if (pcbddc->coarse_psi_B) {
4913: MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);
4914: }
4915: if (pcbddc->coarse_psi_D) {
4916: MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);
4917: }
4918: }
4919: }
4920: /* get back data */
4921: *coarse_submat_vals_n = coarse_submat_vals;
4922: return(0);
4923: }
4925: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4926: {
4927: Mat *work_mat;
4928: IS isrow_s,iscol_s;
4929: PetscBool rsorted,csorted;
4930: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4934: ISSorted(isrow,&rsorted);
4935: ISSorted(iscol,&csorted);
4936: ISGetLocalSize(isrow,&rsize);
4937: ISGetLocalSize(iscol,&csize);
4939: if (!rsorted) {
4940: const PetscInt *idxs;
4941: PetscInt *idxs_sorted,i;
4943: PetscMalloc1(rsize,&idxs_perm_r);
4944: PetscMalloc1(rsize,&idxs_sorted);
4945: for (i=0;i<rsize;i++) {
4946: idxs_perm_r[i] = i;
4947: }
4948: ISGetIndices(isrow,&idxs);
4949: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4950: for (i=0;i<rsize;i++) {
4951: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4952: }
4953: ISRestoreIndices(isrow,&idxs);
4954: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4955: } else {
4956: PetscObjectReference((PetscObject)isrow);
4957: isrow_s = isrow;
4958: }
4960: if (!csorted) {
4961: if (isrow == iscol) {
4962: PetscObjectReference((PetscObject)isrow_s);
4963: iscol_s = isrow_s;
4964: } else {
4965: const PetscInt *idxs;
4966: PetscInt *idxs_sorted,i;
4968: PetscMalloc1(csize,&idxs_perm_c);
4969: PetscMalloc1(csize,&idxs_sorted);
4970: for (i=0;i<csize;i++) {
4971: idxs_perm_c[i] = i;
4972: }
4973: ISGetIndices(iscol,&idxs);
4974: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4975: for (i=0;i<csize;i++) {
4976: idxs_sorted[i] = idxs[idxs_perm_c[i]];
4977: }
4978: ISRestoreIndices(iscol,&idxs);
4979: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4980: }
4981: } else {
4982: PetscObjectReference((PetscObject)iscol);
4983: iscol_s = iscol;
4984: }
4986: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
4988: if (!rsorted || !csorted) {
4989: Mat new_mat;
4990: IS is_perm_r,is_perm_c;
4992: if (!rsorted) {
4993: PetscInt *idxs_r,i;
4994: PetscMalloc1(rsize,&idxs_r);
4995: for (i=0;i<rsize;i++) {
4996: idxs_r[idxs_perm_r[i]] = i;
4997: }
4998: PetscFree(idxs_perm_r);
4999: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
5000: } else {
5001: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
5002: }
5003: ISSetPermutation(is_perm_r);
5005: if (!csorted) {
5006: if (isrow_s == iscol_s) {
5007: PetscObjectReference((PetscObject)is_perm_r);
5008: is_perm_c = is_perm_r;
5009: } else {
5010: PetscInt *idxs_c,i;
5011: if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
5012: PetscMalloc1(csize,&idxs_c);
5013: for (i=0;i<csize;i++) {
5014: idxs_c[idxs_perm_c[i]] = i;
5015: }
5016: PetscFree(idxs_perm_c);
5017: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
5018: }
5019: } else {
5020: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
5021: }
5022: ISSetPermutation(is_perm_c);
5024: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
5025: MatDestroy(&work_mat[0]);
5026: work_mat[0] = new_mat;
5027: ISDestroy(&is_perm_r);
5028: ISDestroy(&is_perm_c);
5029: }
5031: PetscObjectReference((PetscObject)work_mat[0]);
5032: *B = work_mat[0];
5033: MatDestroyMatrices(1,&work_mat);
5034: ISDestroy(&isrow_s);
5035: ISDestroy(&iscol_s);
5036: return(0);
5037: }
5039: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5040: {
5041: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5042: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5043: Mat new_mat,lA;
5044: IS is_local,is_global;
5045: PetscInt local_size;
5046: PetscBool isseqaij;
5050: MatDestroy(&pcbddc->local_mat);
5051: MatGetSize(matis->A,&local_size,NULL);
5052: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5053: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
5054: ISDestroy(&is_local);
5055: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5056: ISDestroy(&is_global);
5058: if (pcbddc->dbg_flag) {
5059: Vec x,x_change;
5060: PetscReal error;
5062: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5063: VecSetRandom(x,NULL);
5064: MatMult(ChangeOfBasisMatrix,x,x_change);
5065: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5066: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5067: MatMult(new_mat,matis->x,matis->y);
5068: if (!pcbddc->change_interior) {
5069: const PetscScalar *x,*y,*v;
5070: PetscReal lerror = 0.;
5071: PetscInt i;
5073: VecGetArrayRead(matis->x,&x);
5074: VecGetArrayRead(matis->y,&y);
5075: VecGetArrayRead(matis->counter,&v);
5076: for (i=0;i<local_size;i++)
5077: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5078: lerror = PetscAbsScalar(x[i]-y[i]);
5079: VecRestoreArrayRead(matis->x,&x);
5080: VecRestoreArrayRead(matis->y,&y);
5081: VecRestoreArrayRead(matis->counter,&v);
5082: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
5083: if (error > PETSC_SMALL) {
5084: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5085: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5086: } else {
5087: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5088: }
5089: }
5090: }
5091: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5092: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5093: VecAXPY(x,-1.0,x_change);
5094: VecNorm(x,NORM_INFINITY,&error);
5095: if (error > PETSC_SMALL) {
5096: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5097: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5098: } else {
5099: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5100: }
5101: }
5102: VecDestroy(&x);
5103: VecDestroy(&x_change);
5104: }
5106: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5107: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
5109: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5110: PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5111: if (isseqaij) {
5112: MatDestroy(&pcbddc->local_mat);
5113: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5114: if (lA) {
5115: Mat work;
5116: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5117: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5118: MatDestroy(&work);
5119: }
5120: } else {
5121: Mat work_mat;
5123: MatDestroy(&pcbddc->local_mat);
5124: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5125: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5126: MatDestroy(&work_mat);
5127: if (lA) {
5128: Mat work;
5129: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5130: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5131: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5132: MatDestroy(&work);
5133: }
5134: }
5135: if (matis->A->symmetric_set) {
5136: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5137: #if !defined(PETSC_USE_COMPLEX)
5138: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5139: #endif
5140: }
5141: MatDestroy(&new_mat);
5142: return(0);
5143: }
5145: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5146: {
5147: PC_IS* pcis = (PC_IS*)(pc->data);
5148: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5149: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5150: PetscInt *idx_R_local=NULL;
5151: PetscInt n_vertices,i,j,n_R,n_D,n_B;
5152: PetscInt vbs,bs;
5153: PetscBT bitmask=NULL;
5154: PetscErrorCode ierr;
5157: /*
5158: No need to setup local scatters if
5159: - primal space is unchanged
5160: AND
5161: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5162: AND
5163: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5164: */
5165: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5166: return(0);
5167: }
5168: /* destroy old objects */
5169: ISDestroy(&pcbddc->is_R_local);
5170: VecScatterDestroy(&pcbddc->R_to_B);
5171: VecScatterDestroy(&pcbddc->R_to_D);
5172: /* Set Non-overlapping dimensions */
5173: n_B = pcis->n_B;
5174: n_D = pcis->n - n_B;
5175: n_vertices = pcbddc->n_vertices;
5177: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5179: /* create auxiliary bitmask and allocate workspace */
5180: if (!sub_schurs || !sub_schurs->reuse_solver) {
5181: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5182: PetscBTCreate(pcis->n,&bitmask);
5183: for (i=0;i<n_vertices;i++) {
5184: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5185: }
5187: for (i=0, n_R=0; i<pcis->n; i++) {
5188: if (!PetscBTLookup(bitmask,i)) {
5189: idx_R_local[n_R++] = i;
5190: }
5191: }
5192: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5193: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5195: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5196: ISGetLocalSize(reuse_solver->is_R,&n_R);
5197: }
5199: /* Block code */
5200: vbs = 1;
5201: MatGetBlockSize(pcbddc->local_mat,&bs);
5202: if (bs>1 && !(n_vertices%bs)) {
5203: PetscBool is_blocked = PETSC_TRUE;
5204: PetscInt *vary;
5205: if (!sub_schurs || !sub_schurs->reuse_solver) {
5206: PetscMalloc1(pcis->n/bs,&vary);
5207: PetscArrayzero(vary,pcis->n/bs);
5208: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5209: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5210: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5211: for (i=0; i<pcis->n/bs; i++) {
5212: if (vary[i]!=0 && vary[i]!=bs) {
5213: is_blocked = PETSC_FALSE;
5214: break;
5215: }
5216: }
5217: PetscFree(vary);
5218: } else {
5219: /* Verify directly the R set */
5220: for (i=0; i<n_R/bs; i++) {
5221: PetscInt j,node=idx_R_local[bs*i];
5222: for (j=1; j<bs; j++) {
5223: if (node != idx_R_local[bs*i+j]-j) {
5224: is_blocked = PETSC_FALSE;
5225: break;
5226: }
5227: }
5228: }
5229: }
5230: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5231: vbs = bs;
5232: for (i=0;i<n_R/vbs;i++) {
5233: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5234: }
5235: }
5236: }
5237: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5238: if (sub_schurs && sub_schurs->reuse_solver) {
5239: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5241: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5242: ISDestroy(&reuse_solver->is_R);
5243: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5244: reuse_solver->is_R = pcbddc->is_R_local;
5245: } else {
5246: PetscFree(idx_R_local);
5247: }
5249: /* print some info if requested */
5250: if (pcbddc->dbg_flag) {
5251: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5252: PetscViewerFlush(pcbddc->dbg_viewer);
5253: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5254: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5255: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5256: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5257: PetscViewerFlush(pcbddc->dbg_viewer);
5258: }
5260: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5261: if (!sub_schurs || !sub_schurs->reuse_solver) {
5262: IS is_aux1,is_aux2;
5263: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5265: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5266: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5267: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5268: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5269: for (i=0; i<n_D; i++) {
5270: PetscBTSet(bitmask,is_indices[i]);
5271: }
5272: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5273: for (i=0, j=0; i<n_R; i++) {
5274: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5275: aux_array1[j++] = i;
5276: }
5277: }
5278: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5279: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5280: for (i=0, j=0; i<n_B; i++) {
5281: if (!PetscBTLookup(bitmask,is_indices[i])) {
5282: aux_array2[j++] = i;
5283: }
5284: }
5285: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5286: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5287: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5288: ISDestroy(&is_aux1);
5289: ISDestroy(&is_aux2);
5291: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5292: PetscMalloc1(n_D,&aux_array1);
5293: for (i=0, j=0; i<n_R; i++) {
5294: if (PetscBTLookup(bitmask,idx_R_local[i])) {
5295: aux_array1[j++] = i;
5296: }
5297: }
5298: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5299: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5300: ISDestroy(&is_aux1);
5301: }
5302: PetscBTDestroy(&bitmask);
5303: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5304: } else {
5305: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5306: IS tis;
5307: PetscInt schur_size;
5309: ISGetLocalSize(reuse_solver->is_B,&schur_size);
5310: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5311: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5312: ISDestroy(&tis);
5313: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5314: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5315: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5316: ISDestroy(&tis);
5317: }
5318: }
5319: return(0);
5320: }
5322: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5323: {
5324: MatNullSpace NullSpace;
5325: Mat dmat;
5326: const Vec *nullvecs;
5327: Vec v,v2,*nullvecs2;
5328: VecScatter sct = NULL;
5329: PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs;
5330: PetscBool nnsp_has_cnst;
5334: if (!is && !B) { /* MATIS */
5335: Mat_IS* matis = (Mat_IS*)A->data;
5337: if (!B) {
5338: MatISGetLocalMat(A,&B);
5339: }
5340: sct = matis->cctx;
5341: PetscObjectReference((PetscObject)sct);
5342: } else {
5343: MatGetNullSpace(B,&NullSpace);
5344: if (!NullSpace) {
5345: MatGetNearNullSpace(B,&NullSpace);
5346: }
5347: if (NullSpace) return(0);
5348: }
5349: MatGetNullSpace(A,&NullSpace);
5350: if (!NullSpace) {
5351: MatGetNearNullSpace(A,&NullSpace);
5352: }
5353: if (!NullSpace) return(0);
5355: MatCreateVecs(A,&v,NULL);
5356: MatCreateVecs(B,&v2,NULL);
5357: if (!sct) {
5358: VecScatterCreate(v,is,v2,NULL,&sct);
5359: }
5360: MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5361: bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst;
5362: PetscMalloc1(bsiz,&nullvecs2);
5363: VecGetBlockSize(v2,&bs);
5364: VecGetSize(v2,&N);
5365: VecGetLocalSize(v2,&n);
5366: MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz,NULL,&dmat);
5367: for (k=0;k<nnsp_size;k++) {
5368: PetscScalar *arr;
5370: MatDenseGetColumn(dmat,k,&arr);
5371: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,arr,&nullvecs2[k]);
5372: VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5373: VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5374: MatDenseRestoreColumn(dmat,&arr);
5375: }
5376: if (nnsp_has_cnst) {
5377: PetscScalar *arr;
5379: MatDenseGetColumn(dmat,nnsp_size,&arr);
5380: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,arr,&nullvecs2[nnsp_size]);
5381: VecSet(nullvecs2[nnsp_size],1.0);
5382: MatDenseRestoreColumn(dmat,&arr);
5383: }
5384: PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);
5385: MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);
5386: if (bsiz2 != bsiz) {
5387: Mat dmat2;
5388: IS r,c;
5389: PetscInt rst,ren;
5391: MatGetOwnershipRange(dmat,&rst,&ren);
5392: ISCreateStride(PetscObjectComm((PetscObject)B),ren-rst,rst,1,&r);
5393: ISCreateStride(PetscObjectComm((PetscObject)B),0,bsiz2,1,&c);
5394: MatCreateSubMatrix(dmat,r,c,MAT_INITIAL_MATRIX,&dmat2);
5395: MatDestroy(&dmat);
5396: ISDestroy(&r);
5397: ISDestroy(&c);
5398: dmat = dmat2;
5399: }
5400: PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5401: MatDestroy(&dmat);
5402: for (k=0;k<bsiz;k++) {
5403: VecDestroy(&nullvecs2[k]);
5404: }
5405: PetscFree(nullvecs2);
5406: MatSetNearNullSpace(B,NullSpace);
5407: MatNullSpaceDestroy(&NullSpace);
5408: VecDestroy(&v);
5409: VecDestroy(&v2);
5410: VecScatterDestroy(&sct);
5411: return(0);
5412: }
5414: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5415: {
5416: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
5417: PC_IS *pcis = (PC_IS*)pc->data;
5418: PC pc_temp;
5419: Mat A_RR;
5420: MatNullSpace nnsp;
5421: MatReuse reuse;
5422: PetscScalar m_one = -1.0;
5423: PetscReal value;
5424: PetscInt n_D,n_R;
5425: PetscBool issbaij,opts;
5427: void (*f)(void) = 0;
5428: char dir_prefix[256],neu_prefix[256],str_level[16];
5429: size_t len;
5432: PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5433: /* approximate solver, propagate NearNullSpace if needed */
5434: if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5435: MatNullSpace gnnsp1,gnnsp2;
5436: PetscBool lhas,ghas;
5438: MatGetNearNullSpace(pcbddc->local_mat,&nnsp);
5439: MatGetNearNullSpace(pc->pmat,&gnnsp1);
5440: MatGetNullSpace(pc->pmat,&gnnsp2);
5441: lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5442: MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
5443: if (!ghas && (gnnsp1 || gnnsp2)) {
5444: MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);
5445: }
5446: }
5448: /* compute prefixes */
5449: PetscStrcpy(dir_prefix,"");
5450: PetscStrcpy(neu_prefix,"");
5451: if (!pcbddc->current_level) {
5452: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5453: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5454: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5455: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5456: } else {
5457: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5458: PetscStrlen(((PetscObject)pc)->prefix,&len);
5459: len -= 15; /* remove "pc_bddc_coarse_" */
5460: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5461: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5462: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5463: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5464: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5465: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5466: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5467: PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5468: PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5469: }
5471: /* DIRICHLET PROBLEM */
5472: if (dirichlet) {
5473: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5474: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5475: if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5476: if (pcbddc->dbg_flag) {
5477: Mat A_IIn;
5479: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5480: MatDestroy(&pcis->A_II);
5481: pcis->A_II = A_IIn;
5482: }
5483: }
5484: if (pcbddc->local_mat->symmetric_set) {
5485: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5486: }
5487: /* Matrix for Dirichlet problem is pcis->A_II */
5488: n_D = pcis->n - pcis->n_B;
5489: opts = PETSC_FALSE;
5490: if (!pcbddc->ksp_D) { /* create object if not yet build */
5491: opts = PETSC_TRUE;
5492: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5493: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5494: /* default */
5495: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5496: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5497: PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5498: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5499: if (issbaij) {
5500: PCSetType(pc_temp,PCCHOLESKY);
5501: } else {
5502: PCSetType(pc_temp,PCLU);
5503: }
5504: KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5505: }
5506: MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5507: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5508: /* Allow user's customization */
5509: if (opts) {
5510: KSPSetFromOptions(pcbddc->ksp_D);
5511: }
5512: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5513: if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5514: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5515: }
5516: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5517: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5518: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5519: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5520: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5521: const PetscInt *idxs;
5522: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5524: ISGetLocalSize(pcis->is_I_local,&nl);
5525: ISGetIndices(pcis->is_I_local,&idxs);
5526: PetscMalloc1(nl*cdim,&scoords);
5527: for (i=0;i<nl;i++) {
5528: for (d=0;d<cdim;d++) {
5529: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5530: }
5531: }
5532: ISRestoreIndices(pcis->is_I_local,&idxs);
5533: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5534: PetscFree(scoords);
5535: }
5536: if (sub_schurs && sub_schurs->reuse_solver) {
5537: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5539: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5540: }
5542: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5543: if (!n_D) {
5544: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5545: PCSetType(pc_temp,PCNONE);
5546: }
5547: KSPSetUp(pcbddc->ksp_D);
5548: /* set ksp_D into pcis data */
5549: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5550: KSPDestroy(&pcis->ksp_D);
5551: pcis->ksp_D = pcbddc->ksp_D;
5552: }
5554: /* NEUMANN PROBLEM */
5555: A_RR = 0;
5556: if (neumann) {
5557: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5558: PetscInt ibs,mbs;
5559: PetscBool issbaij, reuse_neumann_solver;
5560: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5562: reuse_neumann_solver = PETSC_FALSE;
5563: if (sub_schurs && sub_schurs->reuse_solver) {
5564: IS iP;
5566: reuse_neumann_solver = PETSC_TRUE;
5567: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5568: if (iP) reuse_neumann_solver = PETSC_FALSE;
5569: }
5570: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5571: ISGetSize(pcbddc->is_R_local,&n_R);
5572: if (pcbddc->ksp_R) { /* already created ksp */
5573: PetscInt nn_R;
5574: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5575: PetscObjectReference((PetscObject)A_RR);
5576: MatGetSize(A_RR,&nn_R,NULL);
5577: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5578: KSPReset(pcbddc->ksp_R);
5579: MatDestroy(&A_RR);
5580: reuse = MAT_INITIAL_MATRIX;
5581: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5582: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5583: MatDestroy(&A_RR);
5584: reuse = MAT_INITIAL_MATRIX;
5585: } else { /* safe to reuse the matrix */
5586: reuse = MAT_REUSE_MATRIX;
5587: }
5588: }
5589: /* last check */
5590: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5591: MatDestroy(&A_RR);
5592: reuse = MAT_INITIAL_MATRIX;
5593: }
5594: } else { /* first time, so we need to create the matrix */
5595: reuse = MAT_INITIAL_MATRIX;
5596: }
5597: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5598: MatGetBlockSize(pcbddc->local_mat,&mbs);
5599: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5600: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5601: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5602: if (matis->A == pcbddc->local_mat) {
5603: MatDestroy(&pcbddc->local_mat);
5604: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5605: } else {
5606: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5607: }
5608: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5609: if (matis->A == pcbddc->local_mat) {
5610: MatDestroy(&pcbddc->local_mat);
5611: MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5612: } else {
5613: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5614: }
5615: }
5616: /* extract A_RR */
5617: if (reuse_neumann_solver) {
5618: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5620: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5621: MatDestroy(&A_RR);
5622: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5623: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5624: } else {
5625: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5626: }
5627: } else {
5628: MatDestroy(&A_RR);
5629: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5630: PetscObjectReference((PetscObject)A_RR);
5631: }
5632: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5633: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5634: }
5635: if (pcbddc->local_mat->symmetric_set) {
5636: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5637: }
5638: opts = PETSC_FALSE;
5639: if (!pcbddc->ksp_R) { /* create object if not present */
5640: opts = PETSC_TRUE;
5641: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5642: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5643: /* default */
5644: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5645: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5646: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5647: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5648: if (issbaij) {
5649: PCSetType(pc_temp,PCCHOLESKY);
5650: } else {
5651: PCSetType(pc_temp,PCLU);
5652: }
5653: KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5654: }
5655: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5656: MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5657: if (opts) { /* Allow user's customization once */
5658: KSPSetFromOptions(pcbddc->ksp_R);
5659: }
5660: MatGetNearNullSpace(A_RR,&nnsp);
5661: if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5662: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5663: }
5664: MatGetNearNullSpace(A_RR,&nnsp);
5665: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5666: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5667: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5668: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5669: const PetscInt *idxs;
5670: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5672: ISGetLocalSize(pcbddc->is_R_local,&nl);
5673: ISGetIndices(pcbddc->is_R_local,&idxs);
5674: PetscMalloc1(nl*cdim,&scoords);
5675: for (i=0;i<nl;i++) {
5676: for (d=0;d<cdim;d++) {
5677: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5678: }
5679: }
5680: ISRestoreIndices(pcbddc->is_R_local,&idxs);
5681: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5682: PetscFree(scoords);
5683: }
5685: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5686: if (!n_R) {
5687: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5688: PCSetType(pc_temp,PCNONE);
5689: }
5690: /* Reuse solver if it is present */
5691: if (reuse_neumann_solver) {
5692: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5694: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5695: }
5696: KSPSetUp(pcbddc->ksp_R);
5697: }
5699: if (pcbddc->dbg_flag) {
5700: PetscViewerFlush(pcbddc->dbg_viewer);
5701: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5702: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5703: }
5704: PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5706: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5707: if (pcbddc->NullSpace_corr[0]) {
5708: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5709: }
5710: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5711: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5712: }
5713: if (neumann && pcbddc->NullSpace_corr[2]) {
5714: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5715: }
5716: /* check Dirichlet and Neumann solvers */
5717: if (pcbddc->dbg_flag) {
5718: if (dirichlet) { /* Dirichlet */
5719: VecSetRandom(pcis->vec1_D,NULL);
5720: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5721: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5722: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5723: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5724: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5725: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5726: PetscViewerFlush(pcbddc->dbg_viewer);
5727: }
5728: if (neumann) { /* Neumann */
5729: VecSetRandom(pcbddc->vec1_R,NULL);
5730: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5731: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5732: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5733: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5734: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5735: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5736: PetscViewerFlush(pcbddc->dbg_viewer);
5737: }
5738: }
5739: /* free Neumann problem's matrix */
5740: MatDestroy(&A_RR);
5741: return(0);
5742: }
5744: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5745: {
5746: PetscErrorCode ierr;
5747: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5748: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5749: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;
5752: if (!reuse_solver) {
5753: VecSet(pcbddc->vec1_R,0.);
5754: }
5755: if (!pcbddc->switch_static) {
5756: if (applytranspose && pcbddc->local_auxmat1) {
5757: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5758: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5759: }
5760: if (!reuse_solver) {
5761: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5762: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5763: } else {
5764: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5766: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5767: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5768: }
5769: } else {
5770: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5771: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5772: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5773: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5774: if (applytranspose && pcbddc->local_auxmat1) {
5775: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5776: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5777: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5778: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5779: }
5780: }
5781: if (!reuse_solver || pcbddc->switch_static) {
5782: if (applytranspose) {
5783: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5784: } else {
5785: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5786: }
5787: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5788: } else {
5789: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5791: if (applytranspose) {
5792: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5793: } else {
5794: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5795: }
5796: }
5797: VecSet(inout_B,0.);
5798: if (!pcbddc->switch_static) {
5799: if (!reuse_solver) {
5800: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5801: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5802: } else {
5803: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5805: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5806: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5807: }
5808: if (!applytranspose && pcbddc->local_auxmat1) {
5809: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5810: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5811: }
5812: } else {
5813: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5814: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5815: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5816: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5817: if (!applytranspose && pcbddc->local_auxmat1) {
5818: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5819: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5820: }
5821: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5822: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5823: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5824: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5825: }
5826: return(0);
5827: }
5829: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5830: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5831: {
5833: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5834: PC_IS* pcis = (PC_IS*) (pc->data);
5835: const PetscScalar zero = 0.0;
5838: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5839: if (!pcbddc->benign_apply_coarse_only) {
5840: if (applytranspose) {
5841: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5842: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5843: } else {
5844: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5845: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5846: }
5847: } else {
5848: VecSet(pcbddc->vec1_P,zero);
5849: }
5851: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5852: if (pcbddc->benign_n) {
5853: PetscScalar *array;
5854: PetscInt j;
5856: VecGetArray(pcbddc->vec1_P,&array);
5857: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5858: VecRestoreArray(pcbddc->vec1_P,&array);
5859: }
5861: /* start communications from local primal nodes to rhs of coarse solver */
5862: VecSet(pcbddc->coarse_vec,zero);
5863: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5864: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5866: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5867: if (pcbddc->coarse_ksp) {
5868: Mat coarse_mat;
5869: Vec rhs,sol;
5870: MatNullSpace nullsp;
5871: PetscBool isbddc = PETSC_FALSE;
5873: if (pcbddc->benign_have_null) {
5874: PC coarse_pc;
5876: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5877: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5878: /* we need to propagate to coarser levels the need for a possible benign correction */
5879: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5880: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5881: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5882: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5883: }
5884: }
5885: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5886: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5887: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5888: if (applytranspose) {
5889: if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5890: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5891: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5892: MatGetTransposeNullSpace(coarse_mat,&nullsp);
5893: if (nullsp) {
5894: MatNullSpaceRemove(nullsp,sol);
5895: }
5896: } else {
5897: MatGetNullSpace(coarse_mat,&nullsp);
5898: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5899: PC coarse_pc;
5901: if (nullsp) {
5902: MatNullSpaceRemove(nullsp,rhs);
5903: }
5904: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5905: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5906: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5907: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5908: } else {
5909: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5910: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5911: if (nullsp) {
5912: MatNullSpaceRemove(nullsp,sol);
5913: }
5914: }
5915: }
5916: /* we don't need the benign correction at coarser levels anymore */
5917: if (pcbddc->benign_have_null && isbddc) {
5918: PC coarse_pc;
5919: PC_BDDC* coarsepcbddc;
5921: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5922: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5923: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5924: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5925: }
5926: }
5928: /* Local solution on R nodes */
5929: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5930: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5931: }
5932: /* communications from coarse sol to local primal nodes */
5933: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5934: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5936: /* Sum contributions from the two levels */
5937: if (!pcbddc->benign_apply_coarse_only) {
5938: if (applytranspose) {
5939: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5940: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5941: } else {
5942: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5943: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5944: }
5945: /* store p0 */
5946: if (pcbddc->benign_n) {
5947: PetscScalar *array;
5948: PetscInt j;
5950: VecGetArray(pcbddc->vec1_P,&array);
5951: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5952: VecRestoreArray(pcbddc->vec1_P,&array);
5953: }
5954: } else { /* expand the coarse solution */
5955: if (applytranspose) {
5956: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5957: } else {
5958: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5959: }
5960: }
5961: return(0);
5962: }
5964: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5965: {
5966: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5967: Vec from,to;
5968: const PetscScalar *array;
5969: PetscErrorCode ierr;
5972: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5973: from = pcbddc->coarse_vec;
5974: to = pcbddc->vec1_P;
5975: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5976: Vec tvec;
5978: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5979: VecResetArray(tvec);
5980: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5981: VecGetArrayRead(tvec,&array);
5982: VecPlaceArray(from,array);
5983: VecRestoreArrayRead(tvec,&array);
5984: }
5985: } else { /* from local to global -> put data in coarse right hand side */
5986: from = pcbddc->vec1_P;
5987: to = pcbddc->coarse_vec;
5988: }
5989: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5990: return(0);
5991: }
5993: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5994: {
5995: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5996: Vec from,to;
5997: const PetscScalar *array;
5998: PetscErrorCode ierr;
6001: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6002: from = pcbddc->coarse_vec;
6003: to = pcbddc->vec1_P;
6004: } else { /* from local to global -> put data in coarse right hand side */
6005: from = pcbddc->vec1_P;
6006: to = pcbddc->coarse_vec;
6007: }
6008: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
6009: if (smode == SCATTER_FORWARD) {
6010: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6011: Vec tvec;
6013: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
6014: VecGetArrayRead(to,&array);
6015: VecPlaceArray(tvec,array);
6016: VecRestoreArrayRead(to,&array);
6017: }
6018: } else {
6019: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6020: VecResetArray(from);
6021: }
6022: }
6023: return(0);
6024: }
6026: /* uncomment for testing purposes */
6027: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
6028: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6029: {
6030: PetscErrorCode ierr;
6031: PC_IS* pcis = (PC_IS*)(pc->data);
6032: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
6033: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
6034: /* one and zero */
6035: PetscScalar one=1.0,zero=0.0;
6036: /* space to store constraints and their local indices */
6037: PetscScalar *constraints_data;
6038: PetscInt *constraints_idxs,*constraints_idxs_B;
6039: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
6040: PetscInt *constraints_n;
6041: /* iterators */
6042: PetscInt i,j,k,total_counts,total_counts_cc,cum;
6043: /* BLAS integers */
6044: PetscBLASInt lwork,lierr;
6045: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
6046: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
6047: /* reuse */
6048: PetscInt olocal_primal_size,olocal_primal_size_cc;
6049: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
6050: /* change of basis */
6051: PetscBool qr_needed;
6052: PetscBT change_basis,qr_needed_idx;
6053: /* auxiliary stuff */
6054: PetscInt *nnz,*is_indices;
6055: PetscInt ncc;
6056: /* some quantities */
6057: PetscInt n_vertices,total_primal_vertices,valid_constraints;
6058: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
6059: PetscReal tol; /* tolerance for retaining eigenmodes */
6062: tol = PetscSqrtReal(PETSC_SMALL);
6063: /* Destroy Mat objects computed previously */
6064: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
6065: MatDestroy(&pcbddc->ConstraintMatrix);
6066: MatDestroy(&pcbddc->switch_static_change);
6067: /* save info on constraints from previous setup (if any) */
6068: olocal_primal_size = pcbddc->local_primal_size;
6069: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6070: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
6071: PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);
6072: PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);
6073: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
6074: PetscFree(pcbddc->primal_indices_local_idxs);
6076: if (!pcbddc->adaptive_selection) {
6077: IS ISForVertices,*ISForFaces,*ISForEdges;
6078: MatNullSpace nearnullsp;
6079: const Vec *nearnullvecs;
6080: Vec *localnearnullsp;
6081: PetscScalar *array;
6082: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
6083: PetscBool nnsp_has_cnst;
6084: /* LAPACK working arrays for SVD or POD */
6085: PetscBool skip_lapack,boolforchange;
6086: PetscScalar *work;
6087: PetscReal *singular_vals;
6088: #if defined(PETSC_USE_COMPLEX)
6089: PetscReal *rwork;
6090: #endif
6091: #if defined(PETSC_MISSING_LAPACK_GESVD)
6092: PetscScalar *temp_basis,*correlation_mat;
6093: #else
6094: PetscBLASInt dummy_int=1;
6095: PetscScalar dummy_scalar=1.;
6096: #endif
6098: /* Get index sets for faces, edges and vertices from graph */
6099: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6100: /* print some info */
6101: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6102: PetscInt nv;
6104: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6105: ISGetSize(ISForVertices,&nv);
6106: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6107: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6108: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6109: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6110: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6111: PetscViewerFlush(pcbddc->dbg_viewer);
6112: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6113: }
6115: /* free unneeded index sets */
6116: if (!pcbddc->use_vertices) {
6117: ISDestroy(&ISForVertices);
6118: }
6119: if (!pcbddc->use_edges) {
6120: for (i=0;i<n_ISForEdges;i++) {
6121: ISDestroy(&ISForEdges[i]);
6122: }
6123: PetscFree(ISForEdges);
6124: n_ISForEdges = 0;
6125: }
6126: if (!pcbddc->use_faces) {
6127: for (i=0;i<n_ISForFaces;i++) {
6128: ISDestroy(&ISForFaces[i]);
6129: }
6130: PetscFree(ISForFaces);
6131: n_ISForFaces = 0;
6132: }
6134: /* check if near null space is attached to global mat */
6135: if (pcbddc->use_nnsp) {
6136: MatGetNearNullSpace(pc->pmat,&nearnullsp);
6137: } else nearnullsp = NULL;
6139: if (nearnullsp) {
6140: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6141: /* remove any stored info */
6142: MatNullSpaceDestroy(&pcbddc->onearnullspace);
6143: PetscFree(pcbddc->onearnullvecs_state);
6144: /* store information for BDDC solver reuse */
6145: PetscObjectReference((PetscObject)nearnullsp);
6146: pcbddc->onearnullspace = nearnullsp;
6147: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6148: for (i=0;i<nnsp_size;i++) {
6149: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6150: }
6151: } else { /* if near null space is not provided BDDC uses constants by default */
6152: nnsp_size = 0;
6153: nnsp_has_cnst = PETSC_TRUE;
6154: }
6155: /* get max number of constraints on a single cc */
6156: max_constraints = nnsp_size;
6157: if (nnsp_has_cnst) max_constraints++;
6159: /*
6160: Evaluate maximum storage size needed by the procedure
6161: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6162: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6163: There can be multiple constraints per connected component
6164: */
6165: n_vertices = 0;
6166: if (ISForVertices) {
6167: ISGetSize(ISForVertices,&n_vertices);
6168: }
6169: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6170: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
6172: total_counts = n_ISForFaces+n_ISForEdges;
6173: total_counts *= max_constraints;
6174: total_counts += n_vertices;
6175: PetscBTCreate(total_counts,&change_basis);
6177: total_counts = 0;
6178: max_size_of_constraint = 0;
6179: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6180: IS used_is;
6181: if (i<n_ISForEdges) {
6182: used_is = ISForEdges[i];
6183: } else {
6184: used_is = ISForFaces[i-n_ISForEdges];
6185: }
6186: ISGetSize(used_is,&j);
6187: total_counts += j;
6188: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6189: }
6190: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
6192: /* get local part of global near null space vectors */
6193: PetscMalloc1(nnsp_size,&localnearnullsp);
6194: for (k=0;k<nnsp_size;k++) {
6195: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6196: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6197: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6198: }
6200: /* whether or not to skip lapack calls */
6201: skip_lapack = PETSC_TRUE;
6202: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6204: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6205: if (!skip_lapack) {
6206: PetscScalar temp_work;
6208: #if defined(PETSC_MISSING_LAPACK_GESVD)
6209: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6210: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6211: PetscMalloc1(max_constraints,&singular_vals);
6212: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6213: #if defined(PETSC_USE_COMPLEX)
6214: PetscMalloc1(3*max_constraints,&rwork);
6215: #endif
6216: /* now we evaluate the optimal workspace using query with lwork=-1 */
6217: PetscBLASIntCast(max_constraints,&Blas_N);
6218: PetscBLASIntCast(max_constraints,&Blas_LDA);
6219: lwork = -1;
6220: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6221: #if !defined(PETSC_USE_COMPLEX)
6222: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6223: #else
6224: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6225: #endif
6226: PetscFPTrapPop();
6227: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6228: #else /* on missing GESVD */
6229: /* SVD */
6230: PetscInt max_n,min_n;
6231: max_n = max_size_of_constraint;
6232: min_n = max_constraints;
6233: if (max_size_of_constraint < max_constraints) {
6234: min_n = max_size_of_constraint;
6235: max_n = max_constraints;
6236: }
6237: PetscMalloc1(min_n,&singular_vals);
6238: #if defined(PETSC_USE_COMPLEX)
6239: PetscMalloc1(5*min_n,&rwork);
6240: #endif
6241: /* now we evaluate the optimal workspace using query with lwork=-1 */
6242: lwork = -1;
6243: PetscBLASIntCast(max_n,&Blas_M);
6244: PetscBLASIntCast(min_n,&Blas_N);
6245: PetscBLASIntCast(max_n,&Blas_LDA);
6246: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6247: #if !defined(PETSC_USE_COMPLEX)
6248: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6249: #else
6250: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6251: #endif
6252: PetscFPTrapPop();
6253: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6254: #endif /* on missing GESVD */
6255: /* Allocate optimal workspace */
6256: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6257: PetscMalloc1(lwork,&work);
6258: }
6259: /* Now we can loop on constraining sets */
6260: total_counts = 0;
6261: constraints_idxs_ptr[0] = 0;
6262: constraints_data_ptr[0] = 0;
6263: /* vertices */
6264: if (n_vertices) {
6265: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6266: PetscArraycpy(constraints_idxs,is_indices,n_vertices);
6267: for (i=0;i<n_vertices;i++) {
6268: constraints_n[total_counts] = 1;
6269: constraints_data[total_counts] = 1.0;
6270: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6271: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6272: total_counts++;
6273: }
6274: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6275: n_vertices = total_counts;
6276: }
6278: /* edges and faces */
6279: total_counts_cc = total_counts;
6280: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6281: IS used_is;
6282: PetscBool idxs_copied = PETSC_FALSE;
6284: if (ncc<n_ISForEdges) {
6285: used_is = ISForEdges[ncc];
6286: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6287: } else {
6288: used_is = ISForFaces[ncc-n_ISForEdges];
6289: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6290: }
6291: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6293: ISGetSize(used_is,&size_of_constraint);
6294: ISGetIndices(used_is,(const PetscInt**)&is_indices);
6295: /* change of basis should not be performed on local periodic nodes */
6296: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6297: if (nnsp_has_cnst) {
6298: PetscScalar quad_value;
6300: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6301: idxs_copied = PETSC_TRUE;
6303: if (!pcbddc->use_nnsp_true) {
6304: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6305: } else {
6306: quad_value = 1.0;
6307: }
6308: for (j=0;j<size_of_constraint;j++) {
6309: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6310: }
6311: temp_constraints++;
6312: total_counts++;
6313: }
6314: for (k=0;k<nnsp_size;k++) {
6315: PetscReal real_value;
6316: PetscScalar *ptr_to_data;
6318: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6319: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6320: for (j=0;j<size_of_constraint;j++) {
6321: ptr_to_data[j] = array[is_indices[j]];
6322: }
6323: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6324: /* check if array is null on the connected component */
6325: PetscBLASIntCast(size_of_constraint,&Blas_N);
6326: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6327: if (real_value > tol*size_of_constraint) { /* keep indices and values */
6328: temp_constraints++;
6329: total_counts++;
6330: if (!idxs_copied) {
6331: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6332: idxs_copied = PETSC_TRUE;
6333: }
6334: }
6335: }
6336: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6337: valid_constraints = temp_constraints;
6338: if (!pcbddc->use_nnsp_true && temp_constraints) {
6339: if (temp_constraints == 1) { /* just normalize the constraint */
6340: PetscScalar norm,*ptr_to_data;
6342: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6343: PetscBLASIntCast(size_of_constraint,&Blas_N);
6344: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6345: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6346: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6347: } else { /* perform SVD */
6348: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6350: #if defined(PETSC_MISSING_LAPACK_GESVD)
6351: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6352: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6353: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6354: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6355: from that computed using LAPACKgesvd
6356: -> This is due to a different computation of eigenvectors in LAPACKheev
6357: -> The quality of the POD-computed basis will be the same */
6358: PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);
6359: /* Store upper triangular part of correlation matrix */
6360: PetscBLASIntCast(size_of_constraint,&Blas_N);
6361: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6362: for (j=0;j<temp_constraints;j++) {
6363: for (k=0;k<j+1;k++) {
6364: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6365: }
6366: }
6367: /* compute eigenvalues and eigenvectors of correlation matrix */
6368: PetscBLASIntCast(temp_constraints,&Blas_N);
6369: PetscBLASIntCast(temp_constraints,&Blas_LDA);
6370: #if !defined(PETSC_USE_COMPLEX)
6371: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6372: #else
6373: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6374: #endif
6375: PetscFPTrapPop();
6376: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6377: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6378: j = 0;
6379: while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6380: total_counts = total_counts-j;
6381: valid_constraints = temp_constraints-j;
6382: /* scale and copy POD basis into used quadrature memory */
6383: PetscBLASIntCast(size_of_constraint,&Blas_M);
6384: PetscBLASIntCast(temp_constraints,&Blas_N);
6385: PetscBLASIntCast(temp_constraints,&Blas_K);
6386: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6387: PetscBLASIntCast(temp_constraints,&Blas_LDB);
6388: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6389: if (j<temp_constraints) {
6390: PetscInt ii;
6391: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6392: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6393: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6394: PetscFPTrapPop();
6395: for (k=0;k<temp_constraints-j;k++) {
6396: for (ii=0;ii<size_of_constraint;ii++) {
6397: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6398: }
6399: }
6400: }
6401: #else /* on missing GESVD */
6402: PetscBLASIntCast(size_of_constraint,&Blas_M);
6403: PetscBLASIntCast(temp_constraints,&Blas_N);
6404: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6405: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6406: #if !defined(PETSC_USE_COMPLEX)
6407: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6408: #else
6409: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6410: #endif
6411: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6412: PetscFPTrapPop();
6413: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6414: k = temp_constraints;
6415: if (k > size_of_constraint) k = size_of_constraint;
6416: j = 0;
6417: while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6418: valid_constraints = k-j;
6419: total_counts = total_counts-temp_constraints+valid_constraints;
6420: #endif /* on missing GESVD */
6421: }
6422: }
6423: /* update pointers information */
6424: if (valid_constraints) {
6425: constraints_n[total_counts_cc] = valid_constraints;
6426: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6427: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6428: /* set change_of_basis flag */
6429: if (boolforchange) {
6430: PetscBTSet(change_basis,total_counts_cc);
6431: }
6432: total_counts_cc++;
6433: }
6434: }
6435: /* free workspace */
6436: if (!skip_lapack) {
6437: PetscFree(work);
6438: #if defined(PETSC_USE_COMPLEX)
6439: PetscFree(rwork);
6440: #endif
6441: PetscFree(singular_vals);
6442: #if defined(PETSC_MISSING_LAPACK_GESVD)
6443: PetscFree(correlation_mat);
6444: PetscFree(temp_basis);
6445: #endif
6446: }
6447: for (k=0;k<nnsp_size;k++) {
6448: VecDestroy(&localnearnullsp[k]);
6449: }
6450: PetscFree(localnearnullsp);
6451: /* free index sets of faces, edges and vertices */
6452: for (i=0;i<n_ISForFaces;i++) {
6453: ISDestroy(&ISForFaces[i]);
6454: }
6455: if (n_ISForFaces) {
6456: PetscFree(ISForFaces);
6457: }
6458: for (i=0;i<n_ISForEdges;i++) {
6459: ISDestroy(&ISForEdges[i]);
6460: }
6461: if (n_ISForEdges) {
6462: PetscFree(ISForEdges);
6463: }
6464: ISDestroy(&ISForVertices);
6465: } else {
6466: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6468: total_counts = 0;
6469: n_vertices = 0;
6470: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6471: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6472: }
6473: max_constraints = 0;
6474: total_counts_cc = 0;
6475: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6476: total_counts += pcbddc->adaptive_constraints_n[i];
6477: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6478: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6479: }
6480: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6481: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6482: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6483: constraints_data = pcbddc->adaptive_constraints_data;
6484: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6485: PetscMalloc1(total_counts_cc,&constraints_n);
6486: total_counts_cc = 0;
6487: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6488: if (pcbddc->adaptive_constraints_n[i]) {
6489: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6490: }
6491: }
6493: max_size_of_constraint = 0;
6494: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6495: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6496: /* Change of basis */
6497: PetscBTCreate(total_counts_cc,&change_basis);
6498: if (pcbddc->use_change_of_basis) {
6499: for (i=0;i<sub_schurs->n_subs;i++) {
6500: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6501: PetscBTSet(change_basis,i+n_vertices);
6502: }
6503: }
6504: }
6505: }
6506: pcbddc->local_primal_size = total_counts;
6507: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
6509: /* map constraints_idxs in boundary numbering */
6510: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6511: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);
6513: /* Create constraint matrix */
6514: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6515: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6516: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
6518: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6519: /* determine if a QR strategy is needed for change of basis */
6520: qr_needed = pcbddc->use_qr_single;
6521: PetscBTCreate(total_counts_cc,&qr_needed_idx);
6522: total_primal_vertices=0;
6523: pcbddc->local_primal_size_cc = 0;
6524: for (i=0;i<total_counts_cc;i++) {
6525: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6526: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6527: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6528: pcbddc->local_primal_size_cc += 1;
6529: } else if (PetscBTLookup(change_basis,i)) {
6530: for (k=0;k<constraints_n[i];k++) {
6531: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6532: }
6533: pcbddc->local_primal_size_cc += constraints_n[i];
6534: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6535: PetscBTSet(qr_needed_idx,i);
6536: qr_needed = PETSC_TRUE;
6537: }
6538: } else {
6539: pcbddc->local_primal_size_cc += 1;
6540: }
6541: }
6542: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6543: pcbddc->n_vertices = total_primal_vertices;
6544: /* permute indices in order to have a sorted set of vertices */
6545: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6546: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6547: PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);
6548: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6550: /* nonzero structure of constraint matrix */
6551: /* and get reference dof for local constraints */
6552: PetscMalloc1(pcbddc->local_primal_size,&nnz);
6553: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6555: j = total_primal_vertices;
6556: total_counts = total_primal_vertices;
6557: cum = total_primal_vertices;
6558: for (i=n_vertices;i<total_counts_cc;i++) {
6559: if (!PetscBTLookup(change_basis,i)) {
6560: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6561: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6562: cum++;
6563: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6564: for (k=0;k<constraints_n[i];k++) {
6565: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6566: nnz[j+k] = size_of_constraint;
6567: }
6568: j += constraints_n[i];
6569: }
6570: }
6571: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6572: MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6573: PetscFree(nnz);
6575: /* set values in constraint matrix */
6576: for (i=0;i<total_primal_vertices;i++) {
6577: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6578: }
6579: total_counts = total_primal_vertices;
6580: for (i=n_vertices;i<total_counts_cc;i++) {
6581: if (!PetscBTLookup(change_basis,i)) {
6582: PetscInt *cols;
6584: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6585: cols = constraints_idxs+constraints_idxs_ptr[i];
6586: for (k=0;k<constraints_n[i];k++) {
6587: PetscInt row = total_counts+k;
6588: PetscScalar *vals;
6590: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6591: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6592: }
6593: total_counts += constraints_n[i];
6594: }
6595: }
6596: /* assembling */
6597: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6598: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6599: MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");
6601: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6602: if (pcbddc->use_change_of_basis) {
6603: /* dual and primal dofs on a single cc */
6604: PetscInt dual_dofs,primal_dofs;
6605: /* working stuff for GEQRF */
6606: PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6607: PetscBLASInt lqr_work;
6608: /* working stuff for UNGQR */
6609: PetscScalar *gqr_work = NULL,lgqr_work_t;
6610: PetscBLASInt lgqr_work;
6611: /* working stuff for TRTRS */
6612: PetscScalar *trs_rhs = NULL;
6613: PetscBLASInt Blas_NRHS;
6614: /* pointers for values insertion into change of basis matrix */
6615: PetscInt *start_rows,*start_cols;
6616: PetscScalar *start_vals;
6617: /* working stuff for values insertion */
6618: PetscBT is_primal;
6619: PetscInt *aux_primal_numbering_B;
6620: /* matrix sizes */
6621: PetscInt global_size,local_size;
6622: /* temporary change of basis */
6623: Mat localChangeOfBasisMatrix;
6624: /* extra space for debugging */
6625: PetscScalar *dbg_work = NULL;
6627: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6628: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6629: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6630: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6631: /* nonzeros for local mat */
6632: PetscMalloc1(pcis->n,&nnz);
6633: if (!pcbddc->benign_change || pcbddc->fake_change) {
6634: for (i=0;i<pcis->n;i++) nnz[i]=1;
6635: } else {
6636: const PetscInt *ii;
6637: PetscInt n;
6638: PetscBool flg_row;
6639: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6640: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6641: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6642: }
6643: for (i=n_vertices;i<total_counts_cc;i++) {
6644: if (PetscBTLookup(change_basis,i)) {
6645: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6646: if (PetscBTLookup(qr_needed_idx,i)) {
6647: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6648: } else {
6649: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6650: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6651: }
6652: }
6653: }
6654: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6655: MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6656: PetscFree(nnz);
6657: /* Set interior change in the matrix */
6658: if (!pcbddc->benign_change || pcbddc->fake_change) {
6659: for (i=0;i<pcis->n;i++) {
6660: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6661: }
6662: } else {
6663: const PetscInt *ii,*jj;
6664: PetscScalar *aa;
6665: PetscInt n;
6666: PetscBool flg_row;
6667: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6668: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6669: for (i=0;i<n;i++) {
6670: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6671: }
6672: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6673: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6674: }
6676: if (pcbddc->dbg_flag) {
6677: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6678: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6679: }
6682: /* Now we loop on the constraints which need a change of basis */
6683: /*
6684: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6685: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6687: Basic blocks of change of basis matrix T computed by
6689: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6691: | 1 0 ... 0 s_1/S |
6692: | 0 1 ... 0 s_2/S |
6693: | ... |
6694: | 0 ... 1 s_{n-1}/S |
6695: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6697: with S = \sum_{i=1}^n s_i^2
6698: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6699: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6701: - QR decomposition of constraints otherwise
6702: */
6703: if (qr_needed && max_size_of_constraint) {
6704: /* space to store Q */
6705: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6706: /* array to store scaling factors for reflectors */
6707: PetscMalloc1(max_constraints,&qr_tau);
6708: /* first we issue queries for optimal work */
6709: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6710: PetscBLASIntCast(max_constraints,&Blas_N);
6711: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6712: lqr_work = -1;
6713: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6714: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6715: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6716: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6717: lgqr_work = -1;
6718: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6719: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6720: PetscBLASIntCast(max_constraints,&Blas_K);
6721: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6722: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6723: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6724: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6725: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6726: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6727: /* array to store rhs and solution of triangular solver */
6728: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6729: /* allocating workspace for check */
6730: if (pcbddc->dbg_flag) {
6731: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6732: }
6733: }
6734: /* array to store whether a node is primal or not */
6735: PetscBTCreate(pcis->n_B,&is_primal);
6736: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6737: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6738: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6739: for (i=0;i<total_primal_vertices;i++) {
6740: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6741: }
6742: PetscFree(aux_primal_numbering_B);
6744: /* loop on constraints and see whether or not they need a change of basis and compute it */
6745: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6746: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6747: if (PetscBTLookup(change_basis,total_counts)) {
6748: /* get constraint info */
6749: primal_dofs = constraints_n[total_counts];
6750: dual_dofs = size_of_constraint-primal_dofs;
6752: if (pcbddc->dbg_flag) {
6753: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6754: }
6756: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6758: /* copy quadrature constraints for change of basis check */
6759: if (pcbddc->dbg_flag) {
6760: PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6761: }
6762: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6763: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6765: /* compute QR decomposition of constraints */
6766: PetscBLASIntCast(size_of_constraint,&Blas_M);
6767: PetscBLASIntCast(primal_dofs,&Blas_N);
6768: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6769: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6770: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6771: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6772: PetscFPTrapPop();
6774: /* explictly compute R^-T */
6775: PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);
6776: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6777: PetscBLASIntCast(primal_dofs,&Blas_N);
6778: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6779: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6780: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6781: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6782: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6783: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6784: PetscFPTrapPop();
6786: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6787: PetscBLASIntCast(size_of_constraint,&Blas_M);
6788: PetscBLASIntCast(size_of_constraint,&Blas_N);
6789: PetscBLASIntCast(primal_dofs,&Blas_K);
6790: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6791: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6792: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6793: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6794: PetscFPTrapPop();
6796: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6797: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6798: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6799: PetscBLASIntCast(size_of_constraint,&Blas_M);
6800: PetscBLASIntCast(primal_dofs,&Blas_N);
6801: PetscBLASIntCast(primal_dofs,&Blas_K);
6802: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6803: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6804: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6805: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6806: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6807: PetscFPTrapPop();
6808: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6810: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6811: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6812: /* insert cols for primal dofs */
6813: for (j=0;j<primal_dofs;j++) {
6814: start_vals = &qr_basis[j*size_of_constraint];
6815: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6816: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6817: }
6818: /* insert cols for dual dofs */
6819: for (j=0,k=0;j<dual_dofs;k++) {
6820: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6821: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6822: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6823: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6824: j++;
6825: }
6826: }
6828: /* check change of basis */
6829: if (pcbddc->dbg_flag) {
6830: PetscInt ii,jj;
6831: PetscBool valid_qr=PETSC_TRUE;
6832: PetscBLASIntCast(primal_dofs,&Blas_M);
6833: PetscBLASIntCast(size_of_constraint,&Blas_N);
6834: PetscBLASIntCast(size_of_constraint,&Blas_K);
6835: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6836: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6837: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6838: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6839: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6840: PetscFPTrapPop();
6841: for (jj=0;jj<size_of_constraint;jj++) {
6842: for (ii=0;ii<primal_dofs;ii++) {
6843: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6844: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6845: }
6846: }
6847: if (!valid_qr) {
6848: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6849: for (jj=0;jj<size_of_constraint;jj++) {
6850: for (ii=0;ii<primal_dofs;ii++) {
6851: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6852: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6853: }
6854: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6855: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6856: }
6857: }
6858: }
6859: } else {
6860: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6861: }
6862: }
6863: } else { /* simple transformation block */
6864: PetscInt row,col;
6865: PetscScalar val,norm;
6867: PetscBLASIntCast(size_of_constraint,&Blas_N);
6868: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6869: for (j=0;j<size_of_constraint;j++) {
6870: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6871: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6872: if (!PetscBTLookup(is_primal,row_B)) {
6873: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6874: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6875: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6876: } else {
6877: for (k=0;k<size_of_constraint;k++) {
6878: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6879: if (row != col) {
6880: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6881: } else {
6882: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6883: }
6884: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6885: }
6886: }
6887: }
6888: if (pcbddc->dbg_flag) {
6889: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6890: }
6891: }
6892: } else {
6893: if (pcbddc->dbg_flag) {
6894: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6895: }
6896: }
6897: }
6899: /* free workspace */
6900: if (qr_needed) {
6901: if (pcbddc->dbg_flag) {
6902: PetscFree(dbg_work);
6903: }
6904: PetscFree(trs_rhs);
6905: PetscFree(qr_tau);
6906: PetscFree(qr_work);
6907: PetscFree(gqr_work);
6908: PetscFree(qr_basis);
6909: }
6910: PetscBTDestroy(&is_primal);
6911: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6912: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6914: /* assembling of global change of variable */
6915: if (!pcbddc->fake_change) {
6916: Mat tmat;
6917: PetscInt bs;
6919: VecGetSize(pcis->vec1_global,&global_size);
6920: VecGetLocalSize(pcis->vec1_global,&local_size);
6921: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6922: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6923: MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6924: MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6925: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6926: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6927: MatGetBlockSize(pc->pmat,&bs);
6928: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6929: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6930: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6931: MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6932: MatDestroy(&tmat);
6933: VecSet(pcis->vec1_global,0.0);
6934: VecSet(pcis->vec1_N,1.0);
6935: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6936: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6937: VecReciprocal(pcis->vec1_global);
6938: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6940: /* check */
6941: if (pcbddc->dbg_flag) {
6942: PetscReal error;
6943: Vec x,x_change;
6945: VecDuplicate(pcis->vec1_global,&x);
6946: VecDuplicate(pcis->vec1_global,&x_change);
6947: VecSetRandom(x,NULL);
6948: VecCopy(x,pcis->vec1_global);
6949: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6950: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6951: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6952: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6953: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6954: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6955: VecAXPY(x,-1.0,x_change);
6956: VecNorm(x,NORM_INFINITY,&error);
6957: if (error > PETSC_SMALL) {
6958: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6959: }
6960: VecDestroy(&x);
6961: VecDestroy(&x_change);
6962: }
6963: /* adapt sub_schurs computed (if any) */
6964: if (pcbddc->use_deluxe_scaling) {
6965: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6967: if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6968: if (sub_schurs && sub_schurs->S_Ej_all) {
6969: Mat S_new,tmat;
6970: IS is_all_N,is_V_Sall = NULL;
6972: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6973: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6974: if (pcbddc->deluxe_zerorows) {
6975: ISLocalToGlobalMapping NtoSall;
6976: IS is_V;
6977: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6978: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6979: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6980: ISLocalToGlobalMappingDestroy(&NtoSall);
6981: ISDestroy(&is_V);
6982: }
6983: ISDestroy(&is_all_N);
6984: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6985: MatDestroy(&sub_schurs->S_Ej_all);
6986: PetscObjectReference((PetscObject)S_new);
6987: if (pcbddc->deluxe_zerorows) {
6988: const PetscScalar *array;
6989: const PetscInt *idxs_V,*idxs_all;
6990: PetscInt i,n_V;
6992: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6993: ISGetLocalSize(is_V_Sall,&n_V);
6994: ISGetIndices(is_V_Sall,&idxs_V);
6995: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6996: VecGetArrayRead(pcis->D,&array);
6997: for (i=0;i<n_V;i++) {
6998: PetscScalar val;
6999: PetscInt idx;
7001: idx = idxs_V[i];
7002: val = array[idxs_all[idxs_V[i]]];
7003: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
7004: }
7005: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
7006: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
7007: VecRestoreArrayRead(pcis->D,&array);
7008: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
7009: ISRestoreIndices(is_V_Sall,&idxs_V);
7010: }
7011: sub_schurs->S_Ej_all = S_new;
7012: MatDestroy(&S_new);
7013: if (sub_schurs->sum_S_Ej_all) {
7014: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7015: MatDestroy(&sub_schurs->sum_S_Ej_all);
7016: PetscObjectReference((PetscObject)S_new);
7017: if (pcbddc->deluxe_zerorows) {
7018: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7019: }
7020: sub_schurs->sum_S_Ej_all = S_new;
7021: MatDestroy(&S_new);
7022: }
7023: ISDestroy(&is_V_Sall);
7024: MatDestroy(&tmat);
7025: }
7026: /* destroy any change of basis context in sub_schurs */
7027: if (sub_schurs && sub_schurs->change) {
7028: PetscInt i;
7030: for (i=0;i<sub_schurs->n_subs;i++) {
7031: KSPDestroy(&sub_schurs->change[i]);
7032: }
7033: PetscFree(sub_schurs->change);
7034: }
7035: }
7036: if (pcbddc->switch_static) { /* need to save the local change */
7037: pcbddc->switch_static_change = localChangeOfBasisMatrix;
7038: } else {
7039: MatDestroy(&localChangeOfBasisMatrix);
7040: }
7041: /* determine if any process has changed the pressures locally */
7042: pcbddc->change_interior = pcbddc->benign_have_null;
7043: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7044: MatDestroy(&pcbddc->ConstraintMatrix);
7045: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7046: pcbddc->use_qr_single = qr_needed;
7047: }
7048: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7049: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7050: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
7051: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7052: } else {
7053: Mat benign_global = NULL;
7054: if (pcbddc->benign_have_null) {
7055: Mat M;
7057: pcbddc->change_interior = PETSC_TRUE;
7058: VecCopy(matis->counter,pcis->vec1_N);
7059: VecReciprocal(pcis->vec1_N);
7060: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
7061: if (pcbddc->benign_change) {
7062: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
7063: MatDiagonalScale(M,pcis->vec1_N,NULL);
7064: } else {
7065: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
7066: MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
7067: }
7068: MatISSetLocalMat(benign_global,M);
7069: MatDestroy(&M);
7070: MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
7071: MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
7072: }
7073: if (pcbddc->user_ChangeOfBasisMatrix) {
7074: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
7075: MatDestroy(&benign_global);
7076: } else if (pcbddc->benign_have_null) {
7077: pcbddc->ChangeOfBasisMatrix = benign_global;
7078: }
7079: }
7080: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7081: IS is_global;
7082: const PetscInt *gidxs;
7084: ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
7085: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
7086: ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
7087: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7088: ISDestroy(&is_global);
7089: }
7090: }
7091: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7092: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7093: }
7095: if (!pcbddc->fake_change) {
7096: /* add pressure dofs to set of primal nodes for numbering purposes */
7097: for (i=0;i<pcbddc->benign_n;i++) {
7098: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7099: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7100: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7101: pcbddc->local_primal_size_cc++;
7102: pcbddc->local_primal_size++;
7103: }
7105: /* check if a new primal space has been introduced (also take into account benign trick) */
7106: pcbddc->new_primal_space_local = PETSC_TRUE;
7107: if (olocal_primal_size == pcbddc->local_primal_size) {
7108: PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7109: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7110: if (!pcbddc->new_primal_space_local) {
7111: PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7112: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7113: }
7114: }
7115: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7116: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7117: }
7118: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
7120: /* flush dbg viewer */
7121: if (pcbddc->dbg_flag) {
7122: PetscViewerFlush(pcbddc->dbg_viewer);
7123: }
7125: /* free workspace */
7126: PetscBTDestroy(&qr_needed_idx);
7127: PetscBTDestroy(&change_basis);
7128: if (!pcbddc->adaptive_selection) {
7129: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7130: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7131: } else {
7132: PetscFree5(pcbddc->adaptive_constraints_n,
7133: pcbddc->adaptive_constraints_idxs_ptr,
7134: pcbddc->adaptive_constraints_data_ptr,
7135: pcbddc->adaptive_constraints_idxs,
7136: pcbddc->adaptive_constraints_data);
7137: PetscFree(constraints_n);
7138: PetscFree(constraints_idxs_B);
7139: }
7140: return(0);
7141: }
7142: /* #undef PETSC_MISSING_LAPACK_GESVD */
7144: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7145: {
7146: ISLocalToGlobalMapping map;
7147: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7148: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
7149: PetscInt i,N;
7150: PetscBool rcsr = PETSC_FALSE;
7151: PetscErrorCode ierr;
7154: if (pcbddc->recompute_topography) {
7155: pcbddc->graphanalyzed = PETSC_FALSE;
7156: /* Reset previously computed graph */
7157: PCBDDCGraphReset(pcbddc->mat_graph);
7158: /* Init local Graph struct */
7159: MatGetSize(pc->pmat,&N,NULL);
7160: MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7161: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
7163: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7164: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7165: }
7166: /* Check validity of the csr graph passed in by the user */
7167: if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
7169: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7170: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7171: PetscInt *xadj,*adjncy;
7172: PetscInt nvtxs;
7173: PetscBool flg_row=PETSC_FALSE;
7175: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7176: if (flg_row) {
7177: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7178: pcbddc->computed_rowadj = PETSC_TRUE;
7179: }
7180: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7181: rcsr = PETSC_TRUE;
7182: }
7183: if (pcbddc->dbg_flag) {
7184: PetscViewerFlush(pcbddc->dbg_viewer);
7185: }
7187: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7188: PetscReal *lcoords;
7189: PetscInt n;
7190: MPI_Datatype dimrealtype;
7192: /* TODO: support for blocked */
7193: if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7194: MatGetLocalSize(matis->A,&n,NULL);
7195: PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7196: MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7197: MPI_Type_commit(&dimrealtype);
7198: PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7199: PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7200: MPI_Type_free(&dimrealtype);
7201: PetscFree(pcbddc->mat_graph->coords);
7203: pcbddc->mat_graph->coords = lcoords;
7204: pcbddc->mat_graph->cloc = PETSC_TRUE;
7205: pcbddc->mat_graph->cnloc = n;
7206: }
7207: if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7208: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);
7210: /* Setup of Graph */
7211: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7212: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
7214: /* attach info on disconnected subdomains if present */
7215: if (pcbddc->n_local_subs) {
7216: PetscInt *local_subs,n,totn;
7218: MatGetLocalSize(matis->A,&n,NULL);
7219: PetscMalloc1(n,&local_subs);
7220: for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7221: for (i=0;i<pcbddc->n_local_subs;i++) {
7222: const PetscInt *idxs;
7223: PetscInt nl,j;
7225: ISGetLocalSize(pcbddc->local_subs[i],&nl);
7226: ISGetIndices(pcbddc->local_subs[i],&idxs);
7227: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7228: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7229: }
7230: for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7231: pcbddc->mat_graph->n_local_subs = totn + 1;
7232: pcbddc->mat_graph->local_subs = local_subs;
7233: }
7234: }
7236: if (!pcbddc->graphanalyzed) {
7237: /* Graph's connected components analysis */
7238: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7239: pcbddc->graphanalyzed = PETSC_TRUE;
7240: pcbddc->corner_selected = pcbddc->corner_selection;
7241: }
7242: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7243: return(0);
7244: }
7246: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7247: {
7248: PetscInt i,j,n;
7249: PetscScalar *alphas;
7250: PetscReal norm,*onorms;
7254: n = *nio;
7255: if (!n) return(0);
7256: PetscMalloc2(n,&alphas,n,&onorms);
7257: VecNormalize(vecs[0],&norm);
7258: if (norm < PETSC_SMALL) {
7259: onorms[0] = 0.0;
7260: VecSet(vecs[0],0.0);
7261: } else {
7262: onorms[0] = norm;
7263: }
7265: for (i=1;i<n;i++) {
7266: VecMDot(vecs[i],i,vecs,alphas);
7267: for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7268: VecMAXPY(vecs[i],i,alphas,vecs);
7269: VecNormalize(vecs[i],&norm);
7270: if (norm < PETSC_SMALL) {
7271: onorms[i] = 0.0;
7272: VecSet(vecs[i],0.0);
7273: } else {
7274: onorms[i] = norm;
7275: }
7276: }
7277: /* push nonzero vectors at the beginning */
7278: for (i=0;i<n;i++) {
7279: if (onorms[i] == 0.0) {
7280: for (j=i+1;j<n;j++) {
7281: if (onorms[j] != 0.0) {
7282: VecCopy(vecs[j],vecs[i]);
7283: onorms[j] = 0.0;
7284: }
7285: }
7286: }
7287: }
7288: for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7289: PetscFree2(alphas,onorms);
7290: return(0);
7291: }
7293: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7294: {
7295: Mat A;
7296: PetscInt n_neighs,*neighs,*n_shared,**shared;
7297: PetscMPIInt size,rank,color;
7298: PetscInt *xadj,*adjncy;
7299: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7300: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
7301: PetscInt void_procs,*procs_candidates = NULL;
7302: PetscInt xadj_count,*count;
7303: PetscBool ismatis,use_vwgt=PETSC_FALSE;
7304: PetscSubcomm psubcomm;
7305: MPI_Comm subcomm;
7310: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7311: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7314: if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);
7316: if (have_void) *have_void = PETSC_FALSE;
7317: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7318: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7319: MatISGetLocalMat(mat,&A);
7320: MatGetLocalSize(A,&n,NULL);
7321: im_active = !!n;
7322: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7323: void_procs = size - active_procs;
7324: /* get ranks of of non-active processes in mat communicator */
7325: if (void_procs) {
7326: PetscInt ncand;
7328: if (have_void) *have_void = PETSC_TRUE;
7329: PetscMalloc1(size,&procs_candidates);
7330: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7331: for (i=0,ncand=0;i<size;i++) {
7332: if (!procs_candidates[i]) {
7333: procs_candidates[ncand++] = i;
7334: }
7335: }
7336: /* force n_subdomains to be not greater that the number of non-active processes */
7337: *n_subdomains = PetscMin(void_procs,*n_subdomains);
7338: }
7340: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7341: number of subdomains requested 1 -> send to master or first candidate in voids */
7342: MatGetSize(mat,&N,NULL);
7343: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7344: PetscInt issize,isidx,dest;
7345: if (*n_subdomains == 1) dest = 0;
7346: else dest = rank;
7347: if (im_active) {
7348: issize = 1;
7349: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7350: isidx = procs_candidates[dest];
7351: } else {
7352: isidx = dest;
7353: }
7354: } else {
7355: issize = 0;
7356: isidx = -1;
7357: }
7358: if (*n_subdomains != 1) *n_subdomains = active_procs;
7359: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7360: PetscFree(procs_candidates);
7361: return(0);
7362: }
7363: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7364: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7365: threshold = PetscMax(threshold,2);
7367: /* Get info on mapping */
7368: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7370: /* build local CSR graph of subdomains' connectivity */
7371: PetscMalloc1(2,&xadj);
7372: xadj[0] = 0;
7373: xadj[1] = PetscMax(n_neighs-1,0);
7374: PetscMalloc1(xadj[1],&adjncy);
7375: PetscMalloc1(xadj[1],&adjncy_wgt);
7376: PetscCalloc1(n,&count);
7377: for (i=1;i<n_neighs;i++)
7378: for (j=0;j<n_shared[i];j++)
7379: count[shared[i][j]] += 1;
7381: xadj_count = 0;
7382: for (i=1;i<n_neighs;i++) {
7383: for (j=0;j<n_shared[i];j++) {
7384: if (count[shared[i][j]] < threshold) {
7385: adjncy[xadj_count] = neighs[i];
7386: adjncy_wgt[xadj_count] = n_shared[i];
7387: xadj_count++;
7388: break;
7389: }
7390: }
7391: }
7392: xadj[1] = xadj_count;
7393: PetscFree(count);
7394: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7395: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7397: PetscMalloc1(1,&ranks_send_to_idx);
7399: /* Restrict work on active processes only */
7400: PetscMPIIntCast(im_active,&color);
7401: if (void_procs) {
7402: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7403: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7404: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7405: subcomm = PetscSubcommChild(psubcomm);
7406: } else {
7407: psubcomm = NULL;
7408: subcomm = PetscObjectComm((PetscObject)mat);
7409: }
7411: v_wgt = NULL;
7412: if (!color) {
7413: PetscFree(xadj);
7414: PetscFree(adjncy);
7415: PetscFree(adjncy_wgt);
7416: } else {
7417: Mat subdomain_adj;
7418: IS new_ranks,new_ranks_contig;
7419: MatPartitioning partitioner;
7420: PetscInt rstart=0,rend=0;
7421: PetscInt *is_indices,*oldranks;
7422: PetscMPIInt size;
7423: PetscBool aggregate;
7425: MPI_Comm_size(subcomm,&size);
7426: if (void_procs) {
7427: PetscInt prank = rank;
7428: PetscMalloc1(size,&oldranks);
7429: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7430: for (i=0;i<xadj[1];i++) {
7431: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7432: }
7433: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7434: } else {
7435: oldranks = NULL;
7436: }
7437: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7438: if (aggregate) { /* TODO: all this part could be made more efficient */
7439: PetscInt lrows,row,ncols,*cols;
7440: PetscMPIInt nrank;
7441: PetscScalar *vals;
7443: MPI_Comm_rank(subcomm,&nrank);
7444: lrows = 0;
7445: if (nrank<redprocs) {
7446: lrows = size/redprocs;
7447: if (nrank<size%redprocs) lrows++;
7448: }
7449: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7450: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7451: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7452: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7453: row = nrank;
7454: ncols = xadj[1]-xadj[0];
7455: cols = adjncy;
7456: PetscMalloc1(ncols,&vals);
7457: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7458: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7459: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7460: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7461: PetscFree(xadj);
7462: PetscFree(adjncy);
7463: PetscFree(adjncy_wgt);
7464: PetscFree(vals);
7465: if (use_vwgt) {
7466: Vec v;
7467: const PetscScalar *array;
7468: PetscInt nl;
7470: MatCreateVecs(subdomain_adj,&v,NULL);
7471: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7472: VecAssemblyBegin(v);
7473: VecAssemblyEnd(v);
7474: VecGetLocalSize(v,&nl);
7475: VecGetArrayRead(v,&array);
7476: PetscMalloc1(nl,&v_wgt);
7477: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7478: VecRestoreArrayRead(v,&array);
7479: VecDestroy(&v);
7480: }
7481: } else {
7482: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7483: if (use_vwgt) {
7484: PetscMalloc1(1,&v_wgt);
7485: v_wgt[0] = n;
7486: }
7487: }
7488: /* MatView(subdomain_adj,0); */
7490: /* Partition */
7491: MatPartitioningCreate(subcomm,&partitioner);
7492: #if defined(PETSC_HAVE_PTSCOTCH)
7493: MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7494: #elif defined(PETSC_HAVE_PARMETIS)
7495: MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7496: #else
7497: MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7498: #endif
7499: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7500: if (v_wgt) {
7501: MatPartitioningSetVertexWeights(partitioner,v_wgt);
7502: }
7503: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7504: MatPartitioningSetNParts(partitioner,*n_subdomains);
7505: MatPartitioningSetFromOptions(partitioner);
7506: MatPartitioningApply(partitioner,&new_ranks);
7507: /* MatPartitioningView(partitioner,0); */
7509: /* renumber new_ranks to avoid "holes" in new set of processors */
7510: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7511: ISDestroy(&new_ranks);
7512: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7513: if (!aggregate) {
7514: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7515: #if defined(PETSC_USE_DEBUG)
7516: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7517: #endif
7518: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7519: } else if (oldranks) {
7520: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7521: } else {
7522: ranks_send_to_idx[0] = is_indices[0];
7523: }
7524: } else {
7525: PetscInt idx = 0;
7526: PetscMPIInt tag;
7527: MPI_Request *reqs;
7529: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7530: PetscMalloc1(rend-rstart,&reqs);
7531: for (i=rstart;i<rend;i++) {
7532: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7533: }
7534: MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7535: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7536: PetscFree(reqs);
7537: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7538: #if defined(PETSC_USE_DEBUG)
7539: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7540: #endif
7541: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7542: } else if (oldranks) {
7543: ranks_send_to_idx[0] = oldranks[idx];
7544: } else {
7545: ranks_send_to_idx[0] = idx;
7546: }
7547: }
7548: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7549: /* clean up */
7550: PetscFree(oldranks);
7551: ISDestroy(&new_ranks_contig);
7552: MatDestroy(&subdomain_adj);
7553: MatPartitioningDestroy(&partitioner);
7554: }
7555: PetscSubcommDestroy(&psubcomm);
7556: PetscFree(procs_candidates);
7558: /* assemble parallel IS for sends */
7559: i = 1;
7560: if (!color) i=0;
7561: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7562: return(0);
7563: }
7565: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7567: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7568: {
7569: Mat local_mat;
7570: IS is_sends_internal;
7571: PetscInt rows,cols,new_local_rows;
7572: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7573: PetscBool ismatis,isdense,newisdense,destroy_mat;
7574: ISLocalToGlobalMapping l2gmap;
7575: PetscInt* l2gmap_indices;
7576: const PetscInt* is_indices;
7577: MatType new_local_type;
7578: /* buffers */
7579: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7580: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7581: PetscInt *recv_buffer_idxs_local;
7582: PetscScalar *ptr_vals,*recv_buffer_vals;
7583: const PetscScalar *send_buffer_vals;
7584: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7585: /* MPI */
7586: MPI_Comm comm,comm_n;
7587: PetscSubcomm subcomm;
7588: PetscMPIInt n_sends,n_recvs,size;
7589: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7590: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7591: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7592: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7593: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7594: PetscErrorCode ierr;
7598: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7599: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7606: if (nvecs) {
7607: if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7609: }
7610: /* further checks */
7611: MatISGetLocalMat(mat,&local_mat);
7612: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7613: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7614: MatGetSize(local_mat,&rows,&cols);
7615: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7616: if (reuse && *mat_n) {
7617: PetscInt mrows,mcols,mnrows,mncols;
7619: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7620: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7621: MatGetSize(mat,&mrows,&mcols);
7622: MatGetSize(*mat_n,&mnrows,&mncols);
7623: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7624: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7625: }
7626: MatGetBlockSize(local_mat,&bs);
7629: /* prepare IS for sending if not provided */
7630: if (!is_sends) {
7631: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7632: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7633: } else {
7634: PetscObjectReference((PetscObject)is_sends);
7635: is_sends_internal = is_sends;
7636: }
7638: /* get comm */
7639: PetscObjectGetComm((PetscObject)mat,&comm);
7641: /* compute number of sends */
7642: ISGetLocalSize(is_sends_internal,&i);
7643: PetscMPIIntCast(i,&n_sends);
7645: /* compute number of receives */
7646: MPI_Comm_size(comm,&size);
7647: PetscMalloc1(size,&iflags);
7648: PetscArrayzero(iflags,size);
7649: ISGetIndices(is_sends_internal,&is_indices);
7650: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7651: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7652: PetscFree(iflags);
7654: /* restrict comm if requested */
7655: subcomm = 0;
7656: destroy_mat = PETSC_FALSE;
7657: if (restrict_comm) {
7658: PetscMPIInt color,subcommsize;
7660: color = 0;
7661: if (restrict_full) {
7662: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7663: } else {
7664: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7665: }
7666: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7667: subcommsize = size - subcommsize;
7668: /* check if reuse has been requested */
7669: if (reuse) {
7670: if (*mat_n) {
7671: PetscMPIInt subcommsize2;
7672: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7673: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7674: comm_n = PetscObjectComm((PetscObject)*mat_n);
7675: } else {
7676: comm_n = PETSC_COMM_SELF;
7677: }
7678: } else { /* MAT_INITIAL_MATRIX */
7679: PetscMPIInt rank;
7681: MPI_Comm_rank(comm,&rank);
7682: PetscSubcommCreate(comm,&subcomm);
7683: PetscSubcommSetNumber(subcomm,2);
7684: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7685: comm_n = PetscSubcommChild(subcomm);
7686: }
7687: /* flag to destroy *mat_n if not significative */
7688: if (color) destroy_mat = PETSC_TRUE;
7689: } else {
7690: comm_n = comm;
7691: }
7693: /* prepare send/receive buffers */
7694: PetscMalloc1(size,&ilengths_idxs);
7695: PetscArrayzero(ilengths_idxs,size);
7696: PetscMalloc1(size,&ilengths_vals);
7697: PetscArrayzero(ilengths_vals,size);
7698: if (nis) {
7699: PetscCalloc1(size,&ilengths_idxs_is);
7700: }
7702: /* Get data from local matrices */
7703: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7704: /* TODO: See below some guidelines on how to prepare the local buffers */
7705: /*
7706: send_buffer_vals should contain the raw values of the local matrix
7707: send_buffer_idxs should contain:
7708: - MatType_PRIVATE type
7709: - PetscInt size_of_l2gmap
7710: - PetscInt global_row_indices[size_of_l2gmap]
7711: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7712: */
7713: else {
7714: MatDenseGetArrayRead(local_mat,&send_buffer_vals);
7715: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7716: PetscMalloc1(i+2,&send_buffer_idxs);
7717: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7718: send_buffer_idxs[1] = i;
7719: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7720: PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);
7721: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7722: PetscMPIIntCast(i,&len);
7723: for (i=0;i<n_sends;i++) {
7724: ilengths_vals[is_indices[i]] = len*len;
7725: ilengths_idxs[is_indices[i]] = len+2;
7726: }
7727: }
7728: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7729: /* additional is (if any) */
7730: if (nis) {
7731: PetscMPIInt psum;
7732: PetscInt j;
7733: for (j=0,psum=0;j<nis;j++) {
7734: PetscInt plen;
7735: ISGetLocalSize(isarray[j],&plen);
7736: PetscMPIIntCast(plen,&len);
7737: psum += len+1; /* indices + lenght */
7738: }
7739: PetscMalloc1(psum,&send_buffer_idxs_is);
7740: for (j=0,psum=0;j<nis;j++) {
7741: PetscInt plen;
7742: const PetscInt *is_array_idxs;
7743: ISGetLocalSize(isarray[j],&plen);
7744: send_buffer_idxs_is[psum] = plen;
7745: ISGetIndices(isarray[j],&is_array_idxs);
7746: PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);
7747: ISRestoreIndices(isarray[j],&is_array_idxs);
7748: psum += plen+1; /* indices + lenght */
7749: }
7750: for (i=0;i<n_sends;i++) {
7751: ilengths_idxs_is[is_indices[i]] = psum;
7752: }
7753: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7754: }
7755: MatISRestoreLocalMat(mat,&local_mat);
7757: buf_size_idxs = 0;
7758: buf_size_vals = 0;
7759: buf_size_idxs_is = 0;
7760: buf_size_vecs = 0;
7761: for (i=0;i<n_recvs;i++) {
7762: buf_size_idxs += (PetscInt)olengths_idxs[i];
7763: buf_size_vals += (PetscInt)olengths_vals[i];
7764: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7765: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7766: }
7767: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7768: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7769: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7770: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7772: /* get new tags for clean communications */
7773: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7774: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7775: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7776: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7778: /* allocate for requests */
7779: PetscMalloc1(n_sends,&send_req_idxs);
7780: PetscMalloc1(n_sends,&send_req_vals);
7781: PetscMalloc1(n_sends,&send_req_idxs_is);
7782: PetscMalloc1(n_sends,&send_req_vecs);
7783: PetscMalloc1(n_recvs,&recv_req_idxs);
7784: PetscMalloc1(n_recvs,&recv_req_vals);
7785: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7786: PetscMalloc1(n_recvs,&recv_req_vecs);
7788: /* communications */
7789: ptr_idxs = recv_buffer_idxs;
7790: ptr_vals = recv_buffer_vals;
7791: ptr_idxs_is = recv_buffer_idxs_is;
7792: ptr_vecs = recv_buffer_vecs;
7793: for (i=0;i<n_recvs;i++) {
7794: source_dest = onodes[i];
7795: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7796: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7797: ptr_idxs += olengths_idxs[i];
7798: ptr_vals += olengths_vals[i];
7799: if (nis) {
7800: source_dest = onodes_is[i];
7801: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7802: ptr_idxs_is += olengths_idxs_is[i];
7803: }
7804: if (nvecs) {
7805: source_dest = onodes[i];
7806: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7807: ptr_vecs += olengths_idxs[i]-2;
7808: }
7809: }
7810: for (i=0;i<n_sends;i++) {
7811: PetscMPIIntCast(is_indices[i],&source_dest);
7812: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7813: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7814: if (nis) {
7815: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7816: }
7817: if (nvecs) {
7818: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7819: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7820: }
7821: }
7822: ISRestoreIndices(is_sends_internal,&is_indices);
7823: ISDestroy(&is_sends_internal);
7825: /* assemble new l2g map */
7826: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7827: ptr_idxs = recv_buffer_idxs;
7828: new_local_rows = 0;
7829: for (i=0;i<n_recvs;i++) {
7830: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7831: ptr_idxs += olengths_idxs[i];
7832: }
7833: PetscMalloc1(new_local_rows,&l2gmap_indices);
7834: ptr_idxs = recv_buffer_idxs;
7835: new_local_rows = 0;
7836: for (i=0;i<n_recvs;i++) {
7837: PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));
7838: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7839: ptr_idxs += olengths_idxs[i];
7840: }
7841: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7842: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7843: PetscFree(l2gmap_indices);
7845: /* infer new local matrix type from received local matrices type */
7846: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7847: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7848: if (n_recvs) {
7849: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7850: ptr_idxs = recv_buffer_idxs;
7851: for (i=0;i<n_recvs;i++) {
7852: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7853: new_local_type_private = MATAIJ_PRIVATE;
7854: break;
7855: }
7856: ptr_idxs += olengths_idxs[i];
7857: }
7858: switch (new_local_type_private) {
7859: case MATDENSE_PRIVATE:
7860: new_local_type = MATSEQAIJ;
7861: bs = 1;
7862: break;
7863: case MATAIJ_PRIVATE:
7864: new_local_type = MATSEQAIJ;
7865: bs = 1;
7866: break;
7867: case MATBAIJ_PRIVATE:
7868: new_local_type = MATSEQBAIJ;
7869: break;
7870: case MATSBAIJ_PRIVATE:
7871: new_local_type = MATSEQSBAIJ;
7872: break;
7873: default:
7874: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7875: break;
7876: }
7877: } else { /* by default, new_local_type is seqaij */
7878: new_local_type = MATSEQAIJ;
7879: bs = 1;
7880: }
7882: /* create MATIS object if needed */
7883: if (!reuse) {
7884: MatGetSize(mat,&rows,&cols);
7885: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7886: } else {
7887: /* it also destroys the local matrices */
7888: if (*mat_n) {
7889: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7890: } else { /* this is a fake object */
7891: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7892: }
7893: }
7894: MatISGetLocalMat(*mat_n,&local_mat);
7895: MatSetType(local_mat,new_local_type);
7897: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7899: /* Global to local map of received indices */
7900: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7901: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7902: ISLocalToGlobalMappingDestroy(&l2gmap);
7904: /* restore attributes -> type of incoming data and its size */
7905: buf_size_idxs = 0;
7906: for (i=0;i<n_recvs;i++) {
7907: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7908: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7909: buf_size_idxs += (PetscInt)olengths_idxs[i];
7910: }
7911: PetscFree(recv_buffer_idxs);
7913: /* set preallocation */
7914: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7915: if (!newisdense) {
7916: PetscInt *new_local_nnz=0;
7918: ptr_idxs = recv_buffer_idxs_local;
7919: if (n_recvs) {
7920: PetscCalloc1(new_local_rows,&new_local_nnz);
7921: }
7922: for (i=0;i<n_recvs;i++) {
7923: PetscInt j;
7924: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7925: for (j=0;j<*(ptr_idxs+1);j++) {
7926: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7927: }
7928: } else {
7929: /* TODO */
7930: }
7931: ptr_idxs += olengths_idxs[i];
7932: }
7933: if (new_local_nnz) {
7934: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7935: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7936: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7937: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7938: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7939: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7940: } else {
7941: MatSetUp(local_mat);
7942: }
7943: PetscFree(new_local_nnz);
7944: } else {
7945: MatSetUp(local_mat);
7946: }
7948: /* set values */
7949: ptr_vals = recv_buffer_vals;
7950: ptr_idxs = recv_buffer_idxs_local;
7951: for (i=0;i<n_recvs;i++) {
7952: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7953: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7954: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7955: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7956: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7957: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7958: } else {
7959: /* TODO */
7960: }
7961: ptr_idxs += olengths_idxs[i];
7962: ptr_vals += olengths_vals[i];
7963: }
7964: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7965: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7966: MatISRestoreLocalMat(*mat_n,&local_mat);
7967: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7968: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7969: PetscFree(recv_buffer_vals);
7971: #if 0
7972: if (!restrict_comm) { /* check */
7973: Vec lvec,rvec;
7974: PetscReal infty_error;
7976: MatCreateVecs(mat,&rvec,&lvec);
7977: VecSetRandom(rvec,NULL);
7978: MatMult(mat,rvec,lvec);
7979: VecScale(lvec,-1.0);
7980: MatMultAdd(*mat_n,rvec,lvec,lvec);
7981: VecNorm(lvec,NORM_INFINITY,&infty_error);
7982: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7983: VecDestroy(&rvec);
7984: VecDestroy(&lvec);
7985: }
7986: #endif
7988: /* assemble new additional is (if any) */
7989: if (nis) {
7990: PetscInt **temp_idxs,*count_is,j,psum;
7992: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7993: PetscCalloc1(nis,&count_is);
7994: ptr_idxs = recv_buffer_idxs_is;
7995: psum = 0;
7996: for (i=0;i<n_recvs;i++) {
7997: for (j=0;j<nis;j++) {
7998: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7999: count_is[j] += plen; /* increment counting of buffer for j-th IS */
8000: psum += plen;
8001: ptr_idxs += plen+1; /* shift pointer to received data */
8002: }
8003: }
8004: PetscMalloc1(nis,&temp_idxs);
8005: PetscMalloc1(psum,&temp_idxs[0]);
8006: for (i=1;i<nis;i++) {
8007: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
8008: }
8009: PetscArrayzero(count_is,nis);
8010: ptr_idxs = recv_buffer_idxs_is;
8011: for (i=0;i<n_recvs;i++) {
8012: for (j=0;j<nis;j++) {
8013: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8014: PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);
8015: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
8016: ptr_idxs += plen+1; /* shift pointer to received data */
8017: }
8018: }
8019: for (i=0;i<nis;i++) {
8020: ISDestroy(&isarray[i]);
8021: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
8022: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
8023: }
8024: PetscFree(count_is);
8025: PetscFree(temp_idxs[0]);
8026: PetscFree(temp_idxs);
8027: }
8028: /* free workspace */
8029: PetscFree(recv_buffer_idxs_is);
8030: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
8031: PetscFree(send_buffer_idxs);
8032: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
8033: if (isdense) {
8034: MatISGetLocalMat(mat,&local_mat);
8035: MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);
8036: MatISRestoreLocalMat(mat,&local_mat);
8037: } else {
8038: /* PetscFree(send_buffer_vals); */
8039: }
8040: if (nis) {
8041: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
8042: PetscFree(send_buffer_idxs_is);
8043: }
8045: if (nvecs) {
8046: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
8047: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
8048: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8049: VecDestroy(&nnsp_vec[0]);
8050: VecCreate(comm_n,&nnsp_vec[0]);
8051: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
8052: VecSetType(nnsp_vec[0],VECSTANDARD);
8053: /* set values */
8054: ptr_vals = recv_buffer_vecs;
8055: ptr_idxs = recv_buffer_idxs_local;
8056: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
8057: for (i=0;i<n_recvs;i++) {
8058: PetscInt j;
8059: for (j=0;j<*(ptr_idxs+1);j++) {
8060: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
8061: }
8062: ptr_idxs += olengths_idxs[i];
8063: ptr_vals += olengths_idxs[i]-2;
8064: }
8065: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8066: VecAssemblyBegin(nnsp_vec[0]);
8067: VecAssemblyEnd(nnsp_vec[0]);
8068: }
8070: PetscFree(recv_buffer_vecs);
8071: PetscFree(recv_buffer_idxs_local);
8072: PetscFree(recv_req_idxs);
8073: PetscFree(recv_req_vals);
8074: PetscFree(recv_req_vecs);
8075: PetscFree(recv_req_idxs_is);
8076: PetscFree(send_req_idxs);
8077: PetscFree(send_req_vals);
8078: PetscFree(send_req_vecs);
8079: PetscFree(send_req_idxs_is);
8080: PetscFree(ilengths_vals);
8081: PetscFree(ilengths_idxs);
8082: PetscFree(olengths_vals);
8083: PetscFree(olengths_idxs);
8084: PetscFree(onodes);
8085: if (nis) {
8086: PetscFree(ilengths_idxs_is);
8087: PetscFree(olengths_idxs_is);
8088: PetscFree(onodes_is);
8089: }
8090: PetscSubcommDestroy(&subcomm);
8091: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
8092: MatDestroy(mat_n);
8093: for (i=0;i<nis;i++) {
8094: ISDestroy(&isarray[i]);
8095: }
8096: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8097: VecDestroy(&nnsp_vec[0]);
8098: }
8099: *mat_n = NULL;
8100: }
8101: return(0);
8102: }
8104: /* temporary hack into ksp private data structure */
8105: #include <petsc/private/kspimpl.h>
8107: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8108: {
8109: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
8110: PC_IS *pcis = (PC_IS*)pc->data;
8111: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
8112: Mat coarsedivudotp = NULL;
8113: Mat coarseG,t_coarse_mat_is;
8114: MatNullSpace CoarseNullSpace = NULL;
8115: ISLocalToGlobalMapping coarse_islg;
8116: IS coarse_is,*isarray,corners;
8117: PetscInt i,im_active=-1,active_procs=-1;
8118: PetscInt nis,nisdofs,nisneu,nisvert;
8119: PetscInt coarse_eqs_per_proc;
8120: PC pc_temp;
8121: PCType coarse_pc_type;
8122: KSPType coarse_ksp_type;
8123: PetscBool multilevel_requested,multilevel_allowed;
8124: PetscBool coarse_reuse;
8125: PetscInt ncoarse,nedcfield;
8126: PetscBool compute_vecs = PETSC_FALSE;
8127: PetscScalar *array;
8128: MatReuse coarse_mat_reuse;
8129: PetscBool restr, full_restr, have_void;
8130: PetscMPIInt size;
8131: PetscErrorCode ierr;
8134: PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8135: /* Assign global numbering to coarse dofs */
8136: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8137: PetscInt ocoarse_size;
8138: compute_vecs = PETSC_TRUE;
8140: pcbddc->new_primal_space = PETSC_TRUE;
8141: ocoarse_size = pcbddc->coarse_size;
8142: PetscFree(pcbddc->global_primal_indices);
8143: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8144: /* see if we can avoid some work */
8145: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8146: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8147: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8148: KSPReset(pcbddc->coarse_ksp);
8149: coarse_reuse = PETSC_FALSE;
8150: } else { /* we can safely reuse already computed coarse matrix */
8151: coarse_reuse = PETSC_TRUE;
8152: }
8153: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8154: coarse_reuse = PETSC_FALSE;
8155: }
8156: /* reset any subassembling information */
8157: if (!coarse_reuse || pcbddc->recompute_topography) {
8158: ISDestroy(&pcbddc->coarse_subassembling);
8159: }
8160: } else { /* primal space is unchanged, so we can reuse coarse matrix */
8161: coarse_reuse = PETSC_TRUE;
8162: }
8163: if (coarse_reuse && pcbddc->coarse_ksp) {
8164: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8165: PetscObjectReference((PetscObject)coarse_mat);
8166: coarse_mat_reuse = MAT_REUSE_MATRIX;
8167: } else {
8168: coarse_mat = NULL;
8169: coarse_mat_reuse = MAT_INITIAL_MATRIX;
8170: }
8172: /* creates temporary l2gmap and IS for coarse indexes */
8173: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8174: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
8176: /* creates temporary MATIS object for coarse matrix */
8177: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8178: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
8179: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8180: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8181: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8182: MatDestroy(&coarse_submat_dense);
8184: /* count "active" (i.e. with positive local size) and "void" processes */
8185: im_active = !!(pcis->n);
8186: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8188: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8189: /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8190: /* full_restr : just use the receivers from the subassembling pattern */
8191: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8192: coarse_mat_is = NULL;
8193: multilevel_allowed = PETSC_FALSE;
8194: multilevel_requested = PETSC_FALSE;
8195: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8196: if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8197: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8198: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8199: if (multilevel_requested) {
8200: ncoarse = active_procs/pcbddc->coarsening_ratio;
8201: restr = PETSC_FALSE;
8202: full_restr = PETSC_FALSE;
8203: } else {
8204: ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8205: restr = PETSC_TRUE;
8206: full_restr = PETSC_TRUE;
8207: }
8208: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8209: ncoarse = PetscMax(1,ncoarse);
8210: if (!pcbddc->coarse_subassembling) {
8211: if (pcbddc->coarsening_ratio > 1) {
8212: if (multilevel_requested) {
8213: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8214: } else {
8215: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8216: }
8217: } else {
8218: PetscMPIInt rank;
8220: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8221: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8222: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8223: }
8224: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8225: PetscInt psum;
8226: if (pcbddc->coarse_ksp) psum = 1;
8227: else psum = 0;
8228: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8229: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8230: }
8231: /* determine if we can go multilevel */
8232: if (multilevel_requested) {
8233: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8234: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8235: }
8236: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8238: /* dump subassembling pattern */
8239: if (pcbddc->dbg_flag && multilevel_allowed) {
8240: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8241: }
8242: /* compute dofs splitting and neumann boundaries for coarse dofs */
8243: nedcfield = -1;
8244: corners = NULL;
8245: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8246: PetscInt *tidxs,*tidxs2,nout,tsize,i;
8247: const PetscInt *idxs;
8248: ISLocalToGlobalMapping tmap;
8250: /* create map between primal indices (in local representative ordering) and local primal numbering */
8251: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8252: /* allocate space for temporary storage */
8253: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8254: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8255: /* allocate for IS array */
8256: nisdofs = pcbddc->n_ISForDofsLocal;
8257: if (pcbddc->nedclocal) {
8258: if (pcbddc->nedfield > -1) {
8259: nedcfield = pcbddc->nedfield;
8260: } else {
8261: nedcfield = 0;
8262: if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8263: nisdofs = 1;
8264: }
8265: }
8266: nisneu = !!pcbddc->NeumannBoundariesLocal;
8267: nisvert = 0; /* nisvert is not used */
8268: nis = nisdofs + nisneu + nisvert;
8269: PetscMalloc1(nis,&isarray);
8270: /* dofs splitting */
8271: for (i=0;i<nisdofs;i++) {
8272: /* ISView(pcbddc->ISForDofsLocal[i],0); */
8273: if (nedcfield != i) {
8274: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8275: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8276: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8277: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8278: } else {
8279: ISGetLocalSize(pcbddc->nedclocal,&tsize);
8280: ISGetIndices(pcbddc->nedclocal,&idxs);
8281: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8282: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8283: ISRestoreIndices(pcbddc->nedclocal,&idxs);
8284: }
8285: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8286: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8287: /* ISView(isarray[i],0); */
8288: }
8289: /* neumann boundaries */
8290: if (pcbddc->NeumannBoundariesLocal) {
8291: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8292: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8293: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8294: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8295: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8296: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8297: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8298: /* ISView(isarray[nisdofs],0); */
8299: }
8300: /* coordinates */
8301: if (pcbddc->corner_selected) {
8302: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8303: ISGetLocalSize(corners,&tsize);
8304: ISGetIndices(corners,&idxs);
8305: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8306: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8307: ISRestoreIndices(corners,&idxs);
8308: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8309: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8310: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8311: }
8312: PetscFree(tidxs);
8313: PetscFree(tidxs2);
8314: ISLocalToGlobalMappingDestroy(&tmap);
8315: } else {
8316: nis = 0;
8317: nisdofs = 0;
8318: nisneu = 0;
8319: nisvert = 0;
8320: isarray = NULL;
8321: }
8322: /* destroy no longer needed map */
8323: ISLocalToGlobalMappingDestroy(&coarse_islg);
8325: /* subassemble */
8326: if (multilevel_allowed) {
8327: Vec vp[1];
8328: PetscInt nvecs = 0;
8329: PetscBool reuse,reuser;
8331: if (coarse_mat) reuse = PETSC_TRUE;
8332: else reuse = PETSC_FALSE;
8333: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8334: vp[0] = NULL;
8335: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8336: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8337: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8338: VecSetType(vp[0],VECSTANDARD);
8339: nvecs = 1;
8341: if (pcbddc->divudotp) {
8342: Mat B,loc_divudotp;
8343: Vec v,p;
8344: IS dummy;
8345: PetscInt np;
8347: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8348: MatGetSize(loc_divudotp,&np,NULL);
8349: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8350: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8351: MatCreateVecs(B,&v,&p);
8352: VecSet(p,1.);
8353: MatMultTranspose(B,p,v);
8354: VecDestroy(&p);
8355: MatDestroy(&B);
8356: VecGetArray(vp[0],&array);
8357: VecPlaceArray(pcbddc->vec1_P,array);
8358: VecRestoreArray(vp[0],&array);
8359: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8360: VecResetArray(pcbddc->vec1_P);
8361: ISDestroy(&dummy);
8362: VecDestroy(&v);
8363: }
8364: }
8365: if (reuser) {
8366: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8367: } else {
8368: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8369: }
8370: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8371: PetscScalar *arraym;
8372: const PetscScalar *arrayv;
8373: PetscInt nl;
8374: VecGetLocalSize(vp[0],&nl);
8375: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8376: MatDenseGetArray(coarsedivudotp,&arraym);
8377: VecGetArrayRead(vp[0],&arrayv);
8378: PetscArraycpy(arraym,arrayv,nl);
8379: VecRestoreArrayRead(vp[0],&arrayv);
8380: MatDenseRestoreArray(coarsedivudotp,&arraym);
8381: VecDestroy(&vp[0]);
8382: } else {
8383: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8384: }
8385: } else {
8386: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8387: }
8388: if (coarse_mat_is || coarse_mat) {
8389: if (!multilevel_allowed) {
8390: MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8391: } else {
8392: /* if this matrix is present, it means we are not reusing the coarse matrix */
8393: if (coarse_mat_is) {
8394: if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8395: PetscObjectReference((PetscObject)coarse_mat_is);
8396: coarse_mat = coarse_mat_is;
8397: }
8398: }
8399: }
8400: MatDestroy(&t_coarse_mat_is);
8401: MatDestroy(&coarse_mat_is);
8403: /* create local to global scatters for coarse problem */
8404: if (compute_vecs) {
8405: PetscInt lrows;
8406: VecDestroy(&pcbddc->coarse_vec);
8407: if (coarse_mat) {
8408: MatGetLocalSize(coarse_mat,&lrows,NULL);
8409: } else {
8410: lrows = 0;
8411: }
8412: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8413: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8414: VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8415: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8416: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8417: }
8418: ISDestroy(&coarse_is);
8420: /* set defaults for coarse KSP and PC */
8421: if (multilevel_allowed) {
8422: coarse_ksp_type = KSPRICHARDSON;
8423: coarse_pc_type = PCBDDC;
8424: } else {
8425: coarse_ksp_type = KSPPREONLY;
8426: coarse_pc_type = PCREDUNDANT;
8427: }
8429: /* print some info if requested */
8430: if (pcbddc->dbg_flag) {
8431: if (!multilevel_allowed) {
8432: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8433: if (multilevel_requested) {
8434: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8435: } else if (pcbddc->max_levels) {
8436: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8437: }
8438: PetscViewerFlush(pcbddc->dbg_viewer);
8439: }
8440: }
8442: /* communicate coarse discrete gradient */
8443: coarseG = NULL;
8444: if (pcbddc->nedcG && multilevel_allowed) {
8445: MPI_Comm ccomm;
8446: if (coarse_mat) {
8447: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8448: } else {
8449: ccomm = MPI_COMM_NULL;
8450: }
8451: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8452: }
8454: /* create the coarse KSP object only once with defaults */
8455: if (coarse_mat) {
8456: PetscBool isredundant,isbddc,force,valid;
8457: PetscViewer dbg_viewer = NULL;
8459: if (pcbddc->dbg_flag) {
8460: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8461: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8462: }
8463: if (!pcbddc->coarse_ksp) {
8464: char prefix[256],str_level[16];
8465: size_t len;
8467: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8468: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8469: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8470: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8471: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8472: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8473: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8474: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8475: /* TODO is this logic correct? should check for coarse_mat type */
8476: PCSetType(pc_temp,coarse_pc_type);
8477: /* prefix */
8478: PetscStrcpy(prefix,"");
8479: PetscStrcpy(str_level,"");
8480: if (!pcbddc->current_level) {
8481: PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8482: PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8483: } else {
8484: PetscStrlen(((PetscObject)pc)->prefix,&len);
8485: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8486: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8487: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8488: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8489: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8490: PetscStrlcat(prefix,str_level,sizeof(prefix));
8491: }
8492: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8493: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8494: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8495: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8496: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8497: /* allow user customization */
8498: KSPSetFromOptions(pcbddc->coarse_ksp);
8499: /* get some info after set from options */
8500: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8501: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8502: force = PETSC_FALSE;
8503: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8504: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8505: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8506: if (multilevel_allowed && !force && !valid) {
8507: isbddc = PETSC_TRUE;
8508: PCSetType(pc_temp,PCBDDC);
8509: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8510: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8511: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8512: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8513: PetscObjectOptionsBegin((PetscObject)pc_temp);
8514: (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8515: PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8516: PetscOptionsEnd();
8517: pc_temp->setfromoptionscalled++;
8518: }
8519: }
8520: }
8521: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8522: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8523: if (nisdofs) {
8524: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8525: for (i=0;i<nisdofs;i++) {
8526: ISDestroy(&isarray[i]);
8527: }
8528: }
8529: if (nisneu) {
8530: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8531: ISDestroy(&isarray[nisdofs]);
8532: }
8533: if (nisvert) {
8534: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8535: ISDestroy(&isarray[nis-1]);
8536: }
8537: if (coarseG) {
8538: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8539: }
8541: /* get some info after set from options */
8542: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8544: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8545: if (isbddc && !multilevel_allowed) {
8546: PCSetType(pc_temp,coarse_pc_type);
8547: }
8548: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8549: force = PETSC_FALSE;
8550: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8551: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8552: if (multilevel_requested && multilevel_allowed && !valid && !force) {
8553: PCSetType(pc_temp,PCBDDC);
8554: }
8555: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8556: if (isredundant) {
8557: KSP inner_ksp;
8558: PC inner_pc;
8560: PCRedundantGetKSP(pc_temp,&inner_ksp);
8561: KSPGetPC(inner_ksp,&inner_pc);
8562: }
8564: /* parameters which miss an API */
8565: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8566: if (isbddc) {
8567: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8569: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8570: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8571: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8572: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8573: if (pcbddc_coarse->benign_saddle_point) {
8574: Mat coarsedivudotp_is;
8575: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8576: IS row,col;
8577: const PetscInt *gidxs;
8578: PetscInt n,st,M,N;
8580: MatGetSize(coarsedivudotp,&n,NULL);
8581: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8582: st = st-n;
8583: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8584: MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8585: ISLocalToGlobalMappingGetSize(l2gmap,&n);
8586: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8587: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8588: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8589: ISLocalToGlobalMappingCreateIS(row,&rl2g);
8590: ISLocalToGlobalMappingCreateIS(col,&cl2g);
8591: ISGetSize(row,&M);
8592: MatGetSize(coarse_mat,&N,NULL);
8593: ISDestroy(&row);
8594: ISDestroy(&col);
8595: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8596: MatSetType(coarsedivudotp_is,MATIS);
8597: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8598: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8599: ISLocalToGlobalMappingDestroy(&rl2g);
8600: ISLocalToGlobalMappingDestroy(&cl2g);
8601: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8602: MatDestroy(&coarsedivudotp);
8603: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8604: MatDestroy(&coarsedivudotp_is);
8605: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8606: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8607: }
8608: }
8610: /* propagate symmetry info of coarse matrix */
8611: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8612: if (pc->pmat->symmetric_set) {
8613: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8614: }
8615: if (pc->pmat->hermitian_set) {
8616: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8617: }
8618: if (pc->pmat->spd_set) {
8619: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8620: }
8621: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8622: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8623: }
8624: /* set operators */
8625: MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8626: MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8627: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8628: if (pcbddc->dbg_flag) {
8629: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8630: }
8631: }
8632: MatDestroy(&coarseG);
8633: PetscFree(isarray);
8634: #if 0
8635: {
8636: PetscViewer viewer;
8637: char filename[256];
8638: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8639: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8640: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8641: MatView(coarse_mat,viewer);
8642: PetscViewerPopFormat(viewer);
8643: PetscViewerDestroy(&viewer);
8644: }
8645: #endif
8647: if (corners) {
8648: Vec gv;
8649: IS is;
8650: const PetscInt *idxs;
8651: PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8652: PetscScalar *coords;
8654: if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8655: VecGetSize(pcbddc->coarse_vec,&N);
8656: VecGetLocalSize(pcbddc->coarse_vec,&n);
8657: VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8658: VecSetBlockSize(gv,cdim);
8659: VecSetSizes(gv,n*cdim,N*cdim);
8660: VecSetType(gv,VECSTANDARD);
8661: VecSetFromOptions(gv);
8662: VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */
8664: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8665: ISGetLocalSize(is,&n);
8666: ISGetIndices(is,&idxs);
8667: PetscMalloc1(n*cdim,&coords);
8668: for (i=0;i<n;i++) {
8669: for (d=0;d<cdim;d++) {
8670: coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8671: }
8672: }
8673: ISRestoreIndices(is,&idxs);
8674: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8676: ISGetLocalSize(corners,&n);
8677: ISGetIndices(corners,&idxs);
8678: VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8679: ISRestoreIndices(corners,&idxs);
8680: PetscFree(coords);
8681: VecAssemblyBegin(gv);
8682: VecAssemblyEnd(gv);
8683: VecGetArray(gv,&coords);
8684: if (pcbddc->coarse_ksp) {
8685: PC coarse_pc;
8686: PetscBool isbddc;
8688: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8689: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8690: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8691: PetscReal *realcoords;
8693: VecGetLocalSize(gv,&n);
8694: #if defined(PETSC_USE_COMPLEX)
8695: PetscMalloc1(n,&realcoords);
8696: for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8697: #else
8698: realcoords = coords;
8699: #endif
8700: PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8701: #if defined(PETSC_USE_COMPLEX)
8702: PetscFree(realcoords);
8703: #endif
8704: }
8705: }
8706: VecRestoreArray(gv,&coords);
8707: VecDestroy(&gv);
8708: }
8709: ISDestroy(&corners);
8711: if (pcbddc->coarse_ksp) {
8712: Vec crhs,csol;
8714: KSPGetSolution(pcbddc->coarse_ksp,&csol);
8715: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8716: if (!csol) {
8717: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8718: }
8719: if (!crhs) {
8720: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8721: }
8722: }
8723: MatDestroy(&coarsedivudotp);
8725: /* compute null space for coarse solver if the benign trick has been requested */
8726: if (pcbddc->benign_null) {
8728: VecSet(pcbddc->vec1_P,0.);
8729: for (i=0;i<pcbddc->benign_n;i++) {
8730: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8731: }
8732: VecAssemblyBegin(pcbddc->vec1_P);
8733: VecAssemblyEnd(pcbddc->vec1_P);
8734: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8735: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8736: if (coarse_mat) {
8737: Vec nullv;
8738: PetscScalar *array,*array2;
8739: PetscInt nl;
8741: MatCreateVecs(coarse_mat,&nullv,NULL);
8742: VecGetLocalSize(nullv,&nl);
8743: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8744: VecGetArray(nullv,&array2);
8745: PetscArraycpy(array2,array,nl);
8746: VecRestoreArray(nullv,&array2);
8747: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8748: VecNormalize(nullv,NULL);
8749: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8750: VecDestroy(&nullv);
8751: }
8752: }
8753: PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8755: PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8756: if (pcbddc->coarse_ksp) {
8757: PetscBool ispreonly;
8759: if (CoarseNullSpace) {
8760: PetscBool isnull;
8761: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8762: if (isnull) {
8763: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8764: }
8765: /* TODO: add local nullspaces (if any) */
8766: }
8767: /* setup coarse ksp */
8768: KSPSetUp(pcbddc->coarse_ksp);
8769: /* Check coarse problem if in debug mode or if solving with an iterative method */
8770: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8771: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8772: KSP check_ksp;
8773: KSPType check_ksp_type;
8774: PC check_pc;
8775: Vec check_vec,coarse_vec;
8776: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8777: PetscInt its;
8778: PetscBool compute_eigs;
8779: PetscReal *eigs_r,*eigs_c;
8780: PetscInt neigs;
8781: const char *prefix;
8783: /* Create ksp object suitable for estimation of extreme eigenvalues */
8784: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8785: PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8786: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8787: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8788: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8789: /* prevent from setup unneeded object */
8790: KSPGetPC(check_ksp,&check_pc);
8791: PCSetType(check_pc,PCNONE);
8792: if (ispreonly) {
8793: check_ksp_type = KSPPREONLY;
8794: compute_eigs = PETSC_FALSE;
8795: } else {
8796: check_ksp_type = KSPGMRES;
8797: compute_eigs = PETSC_TRUE;
8798: }
8799: KSPSetType(check_ksp,check_ksp_type);
8800: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8801: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8802: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8803: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8804: KSPSetOptionsPrefix(check_ksp,prefix);
8805: KSPAppendOptionsPrefix(check_ksp,"check_");
8806: KSPSetFromOptions(check_ksp);
8807: KSPSetUp(check_ksp);
8808: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8809: KSPSetPC(check_ksp,check_pc);
8810: /* create random vec */
8811: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8812: VecSetRandom(check_vec,NULL);
8813: MatMult(coarse_mat,check_vec,coarse_vec);
8814: /* solve coarse problem */
8815: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8816: KSPCheckSolve(check_ksp,pc,coarse_vec);
8817: /* set eigenvalue estimation if preonly has not been requested */
8818: if (compute_eigs) {
8819: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8820: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8821: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8822: if (neigs) {
8823: lambda_max = eigs_r[neigs-1];
8824: lambda_min = eigs_r[0];
8825: if (pcbddc->use_coarse_estimates) {
8826: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8827: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8828: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8829: }
8830: }
8831: }
8832: }
8834: /* check coarse problem residual error */
8835: if (pcbddc->dbg_flag) {
8836: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8837: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8838: VecAXPY(check_vec,-1.0,coarse_vec);
8839: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8840: MatMult(coarse_mat,check_vec,coarse_vec);
8841: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8842: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8843: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8844: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8845: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8846: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8847: if (CoarseNullSpace) {
8848: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8849: }
8850: if (compute_eigs) {
8851: PetscReal lambda_max_s,lambda_min_s;
8852: KSPConvergedReason reason;
8853: KSPGetType(check_ksp,&check_ksp_type);
8854: KSPGetIterationNumber(check_ksp,&its);
8855: KSPGetConvergedReason(check_ksp,&reason);
8856: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8857: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8858: for (i=0;i<neigs;i++) {
8859: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8860: }
8861: }
8862: PetscViewerFlush(dbg_viewer);
8863: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8864: }
8865: VecDestroy(&check_vec);
8866: VecDestroy(&coarse_vec);
8867: KSPDestroy(&check_ksp);
8868: if (compute_eigs) {
8869: PetscFree(eigs_r);
8870: PetscFree(eigs_c);
8871: }
8872: }
8873: }
8874: MatNullSpaceDestroy(&CoarseNullSpace);
8875: /* print additional info */
8876: if (pcbddc->dbg_flag) {
8877: /* waits until all processes reaches this point */
8878: PetscBarrier((PetscObject)pc);
8879: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8880: PetscViewerFlush(pcbddc->dbg_viewer);
8881: }
8883: /* free memory */
8884: MatDestroy(&coarse_mat);
8885: PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8886: return(0);
8887: }
8889: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8890: {
8891: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8892: PC_IS* pcis = (PC_IS*)pc->data;
8893: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8894: IS subset,subset_mult,subset_n;
8895: PetscInt local_size,coarse_size=0;
8896: PetscInt *local_primal_indices=NULL;
8897: const PetscInt *t_local_primal_indices;
8901: /* Compute global number of coarse dofs */
8902: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8903: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8904: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8905: ISDestroy(&subset_n);
8906: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8907: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8908: ISDestroy(&subset);
8909: ISDestroy(&subset_mult);
8910: ISGetLocalSize(subset_n,&local_size);
8911: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8912: PetscMalloc1(local_size,&local_primal_indices);
8913: ISGetIndices(subset_n,&t_local_primal_indices);
8914: PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);
8915: ISRestoreIndices(subset_n,&t_local_primal_indices);
8916: ISDestroy(&subset_n);
8918: /* check numbering */
8919: if (pcbddc->dbg_flag) {
8920: PetscScalar coarsesum,*array,*array2;
8921: PetscInt i;
8922: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8924: PetscViewerFlush(pcbddc->dbg_viewer);
8925: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8926: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8927: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8928: /* counter */
8929: VecSet(pcis->vec1_global,0.0);
8930: VecSet(pcis->vec1_N,1.0);
8931: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8932: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8933: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8934: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8935: VecSet(pcis->vec1_N,0.0);
8936: for (i=0;i<pcbddc->local_primal_size;i++) {
8937: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8938: }
8939: VecAssemblyBegin(pcis->vec1_N);
8940: VecAssemblyEnd(pcis->vec1_N);
8941: VecSet(pcis->vec1_global,0.0);
8942: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8943: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8944: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8945: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8946: VecGetArray(pcis->vec1_N,&array);
8947: VecGetArray(pcis->vec2_N,&array2);
8948: for (i=0;i<pcis->n;i++) {
8949: if (array[i] != 0.0 && array[i] != array2[i]) {
8950: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8951: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8952: set_error = PETSC_TRUE;
8953: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8954: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8955: }
8956: }
8957: VecRestoreArray(pcis->vec2_N,&array2);
8958: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8959: PetscViewerFlush(pcbddc->dbg_viewer);
8960: for (i=0;i<pcis->n;i++) {
8961: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8962: }
8963: VecRestoreArray(pcis->vec1_N,&array);
8964: VecSet(pcis->vec1_global,0.0);
8965: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8966: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8967: VecSum(pcis->vec1_global,&coarsesum);
8968: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8969: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8970: PetscInt *gidxs;
8972: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8973: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8974: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8975: PetscViewerFlush(pcbddc->dbg_viewer);
8976: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8977: for (i=0;i<pcbddc->local_primal_size;i++) {
8978: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8979: }
8980: PetscViewerFlush(pcbddc->dbg_viewer);
8981: PetscFree(gidxs);
8982: }
8983: PetscViewerFlush(pcbddc->dbg_viewer);
8984: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8985: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8986: }
8988: /* get back data */
8989: *coarse_size_n = coarse_size;
8990: *local_primal_indices_n = local_primal_indices;
8991: return(0);
8992: }
8994: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8995: {
8996: IS localis_t;
8997: PetscInt i,lsize,*idxs,n;
8998: PetscScalar *vals;
9002: /* get indices in local ordering exploiting local to global map */
9003: ISGetLocalSize(globalis,&lsize);
9004: PetscMalloc1(lsize,&vals);
9005: for (i=0;i<lsize;i++) vals[i] = 1.0;
9006: ISGetIndices(globalis,(const PetscInt**)&idxs);
9007: VecSet(gwork,0.0);
9008: VecSet(lwork,0.0);
9009: if (idxs) { /* multilevel guard */
9010: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
9011: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
9012: }
9013: VecAssemblyBegin(gwork);
9014: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
9015: PetscFree(vals);
9016: VecAssemblyEnd(gwork);
9017: /* now compute set in local ordering */
9018: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9019: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9020: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
9021: VecGetSize(lwork,&n);
9022: for (i=0,lsize=0;i<n;i++) {
9023: if (PetscRealPart(vals[i]) > 0.5) {
9024: lsize++;
9025: }
9026: }
9027: PetscMalloc1(lsize,&idxs);
9028: for (i=0,lsize=0;i<n;i++) {
9029: if (PetscRealPart(vals[i]) > 0.5) {
9030: idxs[lsize++] = i;
9031: }
9032: }
9033: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
9034: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
9035: *localis = localis_t;
9036: return(0);
9037: }
9039: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9040: {
9041: PC_IS *pcis=(PC_IS*)pc->data;
9042: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9043: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
9044: Mat S_j;
9045: PetscInt *used_xadj,*used_adjncy;
9046: PetscBool free_used_adj;
9047: PetscErrorCode ierr;
9050: PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9051: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9052: free_used_adj = PETSC_FALSE;
9053: if (pcbddc->sub_schurs_layers == -1) {
9054: used_xadj = NULL;
9055: used_adjncy = NULL;
9056: } else {
9057: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9058: used_xadj = pcbddc->mat_graph->xadj;
9059: used_adjncy = pcbddc->mat_graph->adjncy;
9060: } else if (pcbddc->computed_rowadj) {
9061: used_xadj = pcbddc->mat_graph->xadj;
9062: used_adjncy = pcbddc->mat_graph->adjncy;
9063: } else {
9064: PetscBool flg_row=PETSC_FALSE;
9065: const PetscInt *xadj,*adjncy;
9066: PetscInt nvtxs;
9068: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9069: if (flg_row) {
9070: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
9071: PetscArraycpy(used_xadj,xadj,nvtxs+1);
9072: PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);
9073: free_used_adj = PETSC_TRUE;
9074: } else {
9075: pcbddc->sub_schurs_layers = -1;
9076: used_xadj = NULL;
9077: used_adjncy = NULL;
9078: }
9079: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9080: }
9081: }
9083: /* setup sub_schurs data */
9084: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9085: if (!sub_schurs->schur_explicit) {
9086: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9087: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9088: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
9089: } else {
9090: Mat change = NULL;
9091: Vec scaling = NULL;
9092: IS change_primal = NULL, iP;
9093: PetscInt benign_n;
9094: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9095: PetscBool need_change = PETSC_FALSE;
9096: PetscBool discrete_harmonic = PETSC_FALSE;
9098: if (!pcbddc->use_vertices && reuse_solvers) {
9099: PetscInt n_vertices;
9101: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
9102: reuse_solvers = (PetscBool)!n_vertices;
9103: }
9104: if (!pcbddc->benign_change_explicit) {
9105: benign_n = pcbddc->benign_n;
9106: } else {
9107: benign_n = 0;
9108: }
9109: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9110: We need a global reduction to avoid possible deadlocks.
9111: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9112: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9113: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9114: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9115: need_change = (PetscBool)(!need_change);
9116: }
9117: /* If the user defines additional constraints, we import them here.
9118: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9119: if (need_change) {
9120: PC_IS *pcisf;
9121: PC_BDDC *pcbddcf;
9122: PC pcf;
9124: if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9125: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9126: PCSetOperators(pcf,pc->mat,pc->pmat);
9127: PCSetType(pcf,PCBDDC);
9129: /* hacks */
9130: pcisf = (PC_IS*)pcf->data;
9131: pcisf->is_B_local = pcis->is_B_local;
9132: pcisf->vec1_N = pcis->vec1_N;
9133: pcisf->BtoNmap = pcis->BtoNmap;
9134: pcisf->n = pcis->n;
9135: pcisf->n_B = pcis->n_B;
9136: pcbddcf = (PC_BDDC*)pcf->data;
9137: PetscFree(pcbddcf->mat_graph);
9138: pcbddcf->mat_graph = pcbddc->mat_graph;
9139: pcbddcf->use_faces = PETSC_TRUE;
9140: pcbddcf->use_change_of_basis = PETSC_TRUE;
9141: pcbddcf->use_change_on_faces = PETSC_TRUE;
9142: pcbddcf->use_qr_single = PETSC_TRUE;
9143: pcbddcf->fake_change = PETSC_TRUE;
9145: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9146: PCBDDCConstraintsSetUp(pcf);
9147: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9148: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9149: change = pcbddcf->ConstraintMatrix;
9150: pcbddcf->ConstraintMatrix = NULL;
9152: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9153: PetscFree(pcbddcf->sub_schurs);
9154: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9155: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9156: PetscFree(pcbddcf->primal_indices_local_idxs);
9157: PetscFree(pcbddcf->onearnullvecs_state);
9158: PetscFree(pcf->data);
9159: pcf->ops->destroy = NULL;
9160: pcf->ops->reset = NULL;
9161: PCDestroy(&pcf);
9162: }
9163: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9165: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9166: if (iP) {
9167: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9168: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9169: PetscOptionsEnd();
9170: }
9171: if (discrete_harmonic) {
9172: Mat A;
9173: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9174: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9175: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9176: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9177: MatDestroy(&A);
9178: } else {
9179: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9180: }
9181: MatDestroy(&change);
9182: ISDestroy(&change_primal);
9183: }
9184: MatDestroy(&S_j);
9186: /* free adjacency */
9187: if (free_used_adj) {
9188: PetscFree2(used_xadj,used_adjncy);
9189: }
9190: PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9191: return(0);
9192: }
9194: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9195: {
9196: PC_IS *pcis=(PC_IS*)pc->data;
9197: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9198: PCBDDCGraph graph;
9199: PetscErrorCode ierr;
9202: /* attach interface graph for determining subsets */
9203: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9204: IS verticesIS,verticescomm;
9205: PetscInt vsize,*idxs;
9207: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9208: ISGetSize(verticesIS,&vsize);
9209: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9210: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9211: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9212: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9213: PCBDDCGraphCreate(&graph);
9214: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9215: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9216: ISDestroy(&verticescomm);
9217: PCBDDCGraphComputeConnectedComponents(graph);
9218: } else {
9219: graph = pcbddc->mat_graph;
9220: }
9221: /* print some info */
9222: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9223: IS vertices;
9224: PetscInt nv,nedges,nfaces;
9225: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9226: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9227: ISGetSize(vertices,&nv);
9228: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9229: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9230: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9231: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9232: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9233: PetscViewerFlush(pcbddc->dbg_viewer);
9234: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9235: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9236: }
9238: /* sub_schurs init */
9239: if (!pcbddc->sub_schurs) {
9240: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9241: }
9242: PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
9244: /* free graph struct */
9245: if (pcbddc->sub_schurs_rebuild) {
9246: PCBDDCGraphDestroy(&graph);
9247: }
9248: return(0);
9249: }
9251: PetscErrorCode PCBDDCCheckOperator(PC pc)
9252: {
9253: PC_IS *pcis=(PC_IS*)pc->data;
9254: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9255: PetscErrorCode ierr;
9258: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9259: IS zerodiag = NULL;
9260: Mat S_j,B0_B=NULL;
9261: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
9262: PetscScalar *p0_check,*array,*array2;
9263: PetscReal norm;
9264: PetscInt i;
9266: /* B0 and B0_B */
9267: if (zerodiag) {
9268: IS dummy;
9270: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9271: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9272: MatCreateVecs(B0_B,NULL,&dummy_vec);
9273: ISDestroy(&dummy);
9274: }
9275: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9276: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9277: VecSet(pcbddc->vec1_P,1.0);
9278: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9279: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9280: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9281: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9282: VecReciprocal(vec_scale_P);
9283: /* S_j */
9284: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9285: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9287: /* mimic vector in \widetilde{W}_\Gamma */
9288: VecSetRandom(pcis->vec1_N,NULL);
9289: /* continuous in primal space */
9290: VecSetRandom(pcbddc->coarse_vec,NULL);
9291: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9292: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9293: VecGetArray(pcbddc->vec1_P,&array);
9294: PetscCalloc1(pcbddc->benign_n,&p0_check);
9295: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9296: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9297: VecRestoreArray(pcbddc->vec1_P,&array);
9298: VecAssemblyBegin(pcis->vec1_N);
9299: VecAssemblyEnd(pcis->vec1_N);
9300: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9301: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9302: VecDuplicate(pcis->vec2_B,&vec_check_B);
9303: VecCopy(pcis->vec2_B,vec_check_B);
9305: /* assemble rhs for coarse problem */
9306: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9307: /* local with Schur */
9308: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9309: if (zerodiag) {
9310: VecGetArray(dummy_vec,&array);
9311: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9312: VecRestoreArray(dummy_vec,&array);
9313: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9314: }
9315: /* sum on primal nodes the local contributions */
9316: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9317: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9318: VecGetArray(pcis->vec1_N,&array);
9319: VecGetArray(pcbddc->vec1_P,&array2);
9320: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9321: VecRestoreArray(pcbddc->vec1_P,&array2);
9322: VecRestoreArray(pcis->vec1_N,&array);
9323: VecSet(pcbddc->coarse_vec,0.);
9324: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9325: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9326: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9327: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9328: VecGetArray(pcbddc->vec1_P,&array);
9329: /* scale primal nodes (BDDC sums contibutions) */
9330: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9331: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9332: VecRestoreArray(pcbddc->vec1_P,&array);
9333: VecAssemblyBegin(pcis->vec1_N);
9334: VecAssemblyEnd(pcis->vec1_N);
9335: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9336: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9337: /* global: \widetilde{B0}_B w_\Gamma */
9338: if (zerodiag) {
9339: MatMult(B0_B,pcis->vec2_B,dummy_vec);
9340: VecGetArray(dummy_vec,&array);
9341: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9342: VecRestoreArray(dummy_vec,&array);
9343: }
9344: /* BDDC */
9345: VecSet(pcis->vec1_D,0.);
9346: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
9348: VecCopy(pcis->vec1_B,pcis->vec2_B);
9349: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9350: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9351: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9352: for (i=0;i<pcbddc->benign_n;i++) {
9353: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9354: }
9355: PetscFree(p0_check);
9356: VecDestroy(&vec_scale_P);
9357: VecDestroy(&vec_check_B);
9358: VecDestroy(&dummy_vec);
9359: MatDestroy(&S_j);
9360: MatDestroy(&B0_B);
9361: }
9362: return(0);
9363: }
9365: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9366: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9367: {
9368: Mat At;
9369: IS rows;
9370: PetscInt rst,ren;
9372: PetscLayout rmap;
9375: rst = ren = 0;
9376: if (ccomm != MPI_COMM_NULL) {
9377: PetscLayoutCreate(ccomm,&rmap);
9378: PetscLayoutSetSize(rmap,A->rmap->N);
9379: PetscLayoutSetBlockSize(rmap,1);
9380: PetscLayoutSetUp(rmap);
9381: PetscLayoutGetRange(rmap,&rst,&ren);
9382: }
9383: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9384: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9385: ISDestroy(&rows);
9387: if (ccomm != MPI_COMM_NULL) {
9388: Mat_MPIAIJ *a,*b;
9389: IS from,to;
9390: Vec gvec;
9391: PetscInt lsize;
9393: MatCreate(ccomm,B);
9394: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9395: MatSetType(*B,MATAIJ);
9396: PetscLayoutDestroy(&((*B)->rmap));
9397: PetscLayoutSetUp((*B)->cmap);
9398: a = (Mat_MPIAIJ*)At->data;
9399: b = (Mat_MPIAIJ*)(*B)->data;
9400: MPI_Comm_size(ccomm,&b->size);
9401: MPI_Comm_rank(ccomm,&b->rank);
9402: PetscObjectReference((PetscObject)a->A);
9403: PetscObjectReference((PetscObject)a->B);
9404: b->A = a->A;
9405: b->B = a->B;
9407: b->donotstash = a->donotstash;
9408: b->roworiented = a->roworiented;
9409: b->rowindices = 0;
9410: b->rowvalues = 0;
9411: b->getrowactive = PETSC_FALSE;
9413: (*B)->rmap = rmap;
9414: (*B)->factortype = A->factortype;
9415: (*B)->assembled = PETSC_TRUE;
9416: (*B)->insertmode = NOT_SET_VALUES;
9417: (*B)->preallocated = PETSC_TRUE;
9419: if (a->colmap) {
9420: #if defined(PETSC_USE_CTABLE)
9421: PetscTableCreateCopy(a->colmap,&b->colmap);
9422: #else
9423: PetscMalloc1(At->cmap->N,&b->colmap);
9424: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9425: PetscArraycpy(b->colmap,a->colmap,At->cmap->N);
9426: #endif
9427: } else b->colmap = 0;
9428: if (a->garray) {
9429: PetscInt len;
9430: len = a->B->cmap->n;
9431: PetscMalloc1(len+1,&b->garray);
9432: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9433: if (len) { PetscArraycpy(b->garray,a->garray,len); }
9434: } else b->garray = 0;
9436: PetscObjectReference((PetscObject)a->lvec);
9437: b->lvec = a->lvec;
9438: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
9440: /* cannot use VecScatterCopy */
9441: VecGetLocalSize(b->lvec,&lsize);
9442: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9443: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9444: MatCreateVecs(*B,&gvec,NULL);
9445: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9446: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9447: ISDestroy(&from);
9448: ISDestroy(&to);
9449: VecDestroy(&gvec);
9450: }
9451: MatDestroy(&At);
9452: return(0);
9453: }