Actual source code: bddcprivate.c
petsc-3.9.4 2018-09-11
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: #if !defined(PETSC_USE_COMPLEX)
18: PetscScalar *uwork,*data,*U, ds = 0.;
19: PetscReal *sing;
20: PetscBLASInt bM,bN,lwork,lierr,di = 1;
21: PetscInt ulw,i,nr,nc,n;
25: #if defined(PETSC_MISSING_LAPACK_GESVD)
26: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
27: #else
28: MatGetSize(A,&nr,&nc);
29: if (!nr || !nc) return(0);
31: /* workspace */
32: if (!work) {
33: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
34: PetscMalloc1(ulw,&uwork);
35: } else {
36: ulw = lw;
37: uwork = work;
38: }
39: n = PetscMin(nr,nc);
40: if (!rwork) {
41: PetscMalloc1(n,&sing);
42: } else {
43: sing = rwork;
44: }
46: /* SVD */
47: PetscMalloc1(nr*nr,&U);
48: PetscBLASIntCast(nr,&bM);
49: PetscBLASIntCast(nc,&bN);
50: PetscBLASIntCast(ulw,&lwork);
51: MatDenseGetArray(A,&data);
52: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
53: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
54: PetscFPTrapPop();
55: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
56: MatDenseRestoreArray(A,&data);
57: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
58: if (!rwork) {
59: PetscFree(sing);
60: }
61: if (!work) {
62: PetscFree(uwork);
63: }
64: /* create B */
65: if (!range) {
66: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
67: MatDenseGetArray(*B,&data);
68: PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
69: } else {
70: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
71: MatDenseGetArray(*B,&data);
72: PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
73: }
74: MatDenseRestoreArray(*B,&data);
75: PetscFree(U);
76: #endif
77: #else /* PETSC_USE_COMPLEX */
79: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
80: #endif
81: return(0);
82: }
84: /* TODO REMOVE */
85: #if defined(PRINT_GDET)
86: static int inc = 0;
87: static int lev = 0;
88: #endif
90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
91: {
93: Mat GE,GEd;
94: PetscInt rsize,csize,esize;
95: PetscScalar *ptr;
98: ISGetSize(edge,&esize);
99: if (!esize) return(0);
100: ISGetSize(extrow,&rsize);
101: ISGetSize(extcol,&csize);
103: /* gradients */
104: ptr = work + 5*esize;
105: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108: MatDestroy(&GE);
110: /* constants */
111: ptr += rsize*csize;
112: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115: MatDestroy(&GE);
116: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117: MatDestroy(&GEd);
119: if (corners) {
120: Mat GEc;
121: PetscScalar *vals,v;
123: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
124: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
125: MatDenseGetArray(GEd,&vals);
126: /* v = PetscAbsScalar(vals[0]) */;
127: v = 1.;
128: cvals[0] = vals[0]/v;
129: cvals[1] = vals[1]/v;
130: MatDenseRestoreArray(GEd,&vals);
131: MatScale(*GKins,1./v);
132: #if defined(PRINT_GDET)
133: {
134: PetscViewer viewer;
135: char filename[256];
136: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
137: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
138: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
139: PetscObjectSetName((PetscObject)GEc,"GEc");
140: MatView(GEc,viewer);
141: PetscObjectSetName((PetscObject)(*GKins),"GK");
142: MatView(*GKins,viewer);
143: PetscObjectSetName((PetscObject)GEd,"Gproj");
144: MatView(GEd,viewer);
145: PetscViewerDestroy(&viewer);
146: }
147: #endif
148: MatDestroy(&GEd);
149: MatDestroy(&GEc);
150: }
152: return(0);
153: }
155: PetscErrorCode PCBDDCNedelecSupport(PC pc)
156: {
157: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
158: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
159: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
160: Vec tvec;
161: PetscSF sfv;
162: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
163: MPI_Comm comm;
164: IS lned,primals,allprimals,nedfieldlocal;
165: IS *eedges,*extrows,*extcols,*alleedges;
166: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
167: PetscScalar *vals,*work;
168: PetscReal *rwork;
169: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
170: PetscInt ne,nv,Lv,order,n,field;
171: PetscInt n_neigh,*neigh,*n_shared,**shared;
172: PetscInt i,j,extmem,cum,maxsize,nee;
173: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
174: PetscInt *sfvleaves,*sfvroots;
175: PetscInt *corners,*cedges;
176: PetscInt *ecount,**eneighs,*vcount,**vneighs;
177: #if defined(PETSC_USE_DEBUG)
178: PetscInt *emarks;
179: #endif
180: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
181: PetscErrorCode ierr;
184: /* If the discrete gradient is defined for a subset of dofs and global is true,
185: it assumes G is given in global ordering for all the dofs.
186: Otherwise, the ordering is global for the Nedelec field */
187: order = pcbddc->nedorder;
188: conforming = pcbddc->conforming;
189: field = pcbddc->nedfield;
190: global = pcbddc->nedglobal;
191: setprimal = PETSC_FALSE;
192: print = PETSC_FALSE;
193: singular = PETSC_FALSE;
195: /* Command line customization */
196: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
197: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
198: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
199: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
200: /* print debug info TODO: to be removed */
201: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
202: PetscOptionsEnd();
204: /* Return if there are no edges in the decomposition and the problem is not singular */
205: MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
206: ISLocalToGlobalMappingGetSize(al2g,&n);
207: PetscObjectGetComm((PetscObject)pc,&comm);
208: if (!singular) {
209: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
210: lrc[0] = PETSC_FALSE;
211: for (i=0;i<n;i++) {
212: if (PetscRealPart(vals[i]) > 2.) {
213: lrc[0] = PETSC_TRUE;
214: break;
215: }
216: }
217: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
218: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
219: if (!lrc[1]) return(0);
220: }
222: /* Get Nedelec field */
223: MatISSetUpSF(pc->pmat);
224: if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal);
225: if (pcbddc->n_ISForDofsLocal && field >= 0) {
226: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
227: nedfieldlocal = pcbddc->ISForDofsLocal[field];
228: ISGetLocalSize(nedfieldlocal,&ne);
229: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
230: ne = n;
231: nedfieldlocal = NULL;
232: global = PETSC_TRUE;
233: } else if (field == PETSC_DECIDE) {
234: PetscInt rst,ren,*idx;
236: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
237: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
238: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
239: for (i=rst;i<ren;i++) {
240: PetscInt nc;
242: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
244: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
245: }
246: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
248: PetscMalloc1(n,&idx);
249: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
250: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
251: } else {
252: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
253: }
255: /* Sanity checks */
256: if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
257: if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
258: if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order);
260: /* Just set primal dofs and return */
261: if (setprimal) {
262: IS enedfieldlocal;
263: PetscInt *eidxs;
265: PetscMalloc1(ne,&eidxs);
266: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
267: if (nedfieldlocal) {
268: ISGetIndices(nedfieldlocal,&idxs);
269: for (i=0,cum=0;i<ne;i++) {
270: if (PetscRealPart(vals[idxs[i]]) > 2.) {
271: eidxs[cum++] = idxs[i];
272: }
273: }
274: ISRestoreIndices(nedfieldlocal,&idxs);
275: } else {
276: for (i=0,cum=0;i<ne;i++) {
277: if (PetscRealPart(vals[i]) > 2.) {
278: eidxs[cum++] = i;
279: }
280: }
281: }
282: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
283: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
284: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
285: PetscFree(eidxs);
286: ISDestroy(&nedfieldlocal);
287: ISDestroy(&enedfieldlocal);
288: return(0);
289: }
291: /* Compute some l2g maps */
292: if (nedfieldlocal) {
293: IS is;
295: /* need to map from the local Nedelec field to local numbering */
296: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
297: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
298: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
299: ISLocalToGlobalMappingCreateIS(is,&al2g);
300: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
301: if (global) {
302: PetscObjectReference((PetscObject)al2g);
303: el2g = al2g;
304: } else {
305: IS gis;
307: ISRenumber(is,NULL,NULL,&gis);
308: ISLocalToGlobalMappingCreateIS(gis,&el2g);
309: ISDestroy(&gis);
310: }
311: ISDestroy(&is);
312: } else {
313: /* restore default */
314: pcbddc->nedfield = -1;
315: /* one ref for the destruction of al2g, one for el2g */
316: PetscObjectReference((PetscObject)al2g);
317: PetscObjectReference((PetscObject)al2g);
318: el2g = al2g;
319: fl2g = NULL;
320: }
322: /* Start communication to drop connections for interior edges (for cc analysis only) */
323: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
324: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
325: if (nedfieldlocal) {
326: ISGetIndices(nedfieldlocal,&idxs);
327: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
328: ISRestoreIndices(nedfieldlocal,&idxs);
329: } else {
330: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
331: }
332: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
333: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
335: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
336: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
337: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
338: if (global) {
339: PetscInt rst;
341: MatGetOwnershipRange(G,&rst,NULL);
342: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
343: if (matis->sf_rootdata[i] < 2) {
344: matis->sf_rootdata[cum++] = i + rst;
345: }
346: }
347: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
348: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
349: } else {
350: PetscInt *tbz;
352: PetscMalloc1(ne,&tbz);
353: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
355: ISGetIndices(nedfieldlocal,&idxs);
356: for (i=0,cum=0;i<ne;i++)
357: if (matis->sf_leafdata[idxs[i]] == 1)
358: tbz[cum++] = i;
359: ISRestoreIndices(nedfieldlocal,&idxs);
360: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
361: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
362: PetscFree(tbz);
363: }
364: } else { /* we need the entire G to infer the nullspace */
365: PetscObjectReference((PetscObject)pcbddc->discretegradient);
366: G = pcbddc->discretegradient;
367: }
369: /* Extract subdomain relevant rows of G */
370: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
371: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
372: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
373: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
374: ISDestroy(&lned);
375: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
376: MatDestroy(&lGall);
377: MatISGetLocalMat(lGis,&lG);
379: /* SF for nodal dofs communications */
380: MatGetLocalSize(G,NULL,&Lv);
381: MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
382: PetscObjectReference((PetscObject)vl2g);
383: ISLocalToGlobalMappingGetSize(vl2g,&nv);
384: PetscSFCreate(comm,&sfv);
385: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
386: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
387: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
388: i = singular ? 2 : 1;
389: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
391: /* Destroy temporary G created in MATIS format and modified G */
392: PetscObjectReference((PetscObject)lG);
393: MatDestroy(&lGis);
394: MatDestroy(&G);
396: if (print) {
397: PetscObjectSetName((PetscObject)lG,"initial_lG");
398: MatView(lG,NULL);
399: }
401: /* Save lG for values insertion in change of basis */
402: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
404: /* Analyze the edge-nodes connections (duplicate lG) */
405: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
406: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
407: PetscBTCreate(nv,&btv);
408: PetscBTCreate(ne,&bte);
409: PetscBTCreate(ne,&btb);
410: PetscBTCreate(ne,&btbd);
411: PetscBTCreate(nv,&btvcand);
412: /* need to import the boundary specification to ensure the
413: proper detection of coarse edges' endpoints */
414: if (pcbddc->DirichletBoundariesLocal) {
415: IS is;
417: if (fl2g) {
418: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
419: } else {
420: is = pcbddc->DirichletBoundariesLocal;
421: }
422: ISGetLocalSize(is,&cum);
423: ISGetIndices(is,&idxs);
424: for (i=0;i<cum;i++) {
425: if (idxs[i] >= 0) {
426: PetscBTSet(btb,idxs[i]);
427: PetscBTSet(btbd,idxs[i]);
428: }
429: }
430: ISRestoreIndices(is,&idxs);
431: if (fl2g) {
432: ISDestroy(&is);
433: }
434: }
435: if (pcbddc->NeumannBoundariesLocal) {
436: IS is;
438: if (fl2g) {
439: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
440: } else {
441: is = pcbddc->NeumannBoundariesLocal;
442: }
443: ISGetLocalSize(is,&cum);
444: ISGetIndices(is,&idxs);
445: for (i=0;i<cum;i++) {
446: if (idxs[i] >= 0) {
447: PetscBTSet(btb,idxs[i]);
448: }
449: }
450: ISRestoreIndices(is,&idxs);
451: if (fl2g) {
452: ISDestroy(&is);
453: }
454: }
456: /* Count neighs per dof */
457: PetscCalloc1(ne,&ecount);
458: PetscMalloc1(ne,&eneighs);
459: ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
460: for (i=1,cum=0;i<n_neigh;i++) {
461: cum += n_shared[i];
462: for (j=0;j<n_shared[i];j++) {
463: ecount[shared[i][j]]++;
464: }
465: }
466: if (ne) {
467: PetscMalloc1(cum,&eneighs[0]);
468: }
469: for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1];
470: PetscMemzero(ecount,ne*sizeof(PetscInt));
471: for (i=1;i<n_neigh;i++) {
472: for (j=0;j<n_shared[i];j++) {
473: PetscInt k = shared[i][j];
474: eneighs[k][ecount[k]] = neigh[i];
475: ecount[k]++;
476: }
477: }
478: for (i=0;i<ne;i++) {
479: PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);
480: }
481: ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);
482: PetscCalloc1(nv,&vcount);
483: PetscMalloc1(nv,&vneighs);
484: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
485: for (i=1,cum=0;i<n_neigh;i++) {
486: cum += n_shared[i];
487: for (j=0;j<n_shared[i];j++) {
488: vcount[shared[i][j]]++;
489: }
490: }
491: if (nv) {
492: PetscMalloc1(cum,&vneighs[0]);
493: }
494: for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1];
495: PetscMemzero(vcount,nv*sizeof(PetscInt));
496: for (i=1;i<n_neigh;i++) {
497: for (j=0;j<n_shared[i];j++) {
498: PetscInt k = shared[i][j];
499: vneighs[k][vcount[k]] = neigh[i];
500: vcount[k]++;
501: }
502: }
503: for (i=0;i<nv;i++) {
504: PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);
505: }
506: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
508: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
509: for proper detection of coarse edges' endpoints */
510: PetscBTCreate(ne,&btee);
511: for (i=0;i<ne;i++) {
512: if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) {
513: PetscBTSet(btee,i);
514: }
515: }
516: PetscMalloc1(ne,&marks);
517: if (!conforming) {
518: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
519: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
520: }
521: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
522: MatSeqAIJGetArray(lGe,&vals);
523: cum = 0;
524: for (i=0;i<ne;i++) {
525: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
526: if (!PetscBTLookup(btee,i)) {
527: marks[cum++] = i;
528: continue;
529: }
530: /* set badly connected edge dofs as primal */
531: if (!conforming) {
532: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
533: marks[cum++] = i;
534: PetscBTSet(bte,i);
535: for (j=ii[i];j<ii[i+1];j++) {
536: PetscBTSet(btv,jj[j]);
537: }
538: } else {
539: /* every edge dofs should be connected trough a certain number of nodal dofs
540: to other edge dofs belonging to coarse edges
541: - at most 2 endpoints
542: - order-1 interior nodal dofs
543: - no undefined nodal dofs (nconn < order)
544: */
545: PetscInt ends = 0,ints = 0, undef = 0;
546: for (j=ii[i];j<ii[i+1];j++) {
547: PetscInt v = jj[j],k;
548: PetscInt nconn = iit[v+1]-iit[v];
549: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
550: if (nconn > order) ends++;
551: else if (nconn == order) ints++;
552: else undef++;
553: }
554: if (undef || ends > 2 || ints != order -1) {
555: marks[cum++] = i;
556: PetscBTSet(bte,i);
557: for (j=ii[i];j<ii[i+1];j++) {
558: PetscBTSet(btv,jj[j]);
559: }
560: }
561: }
562: }
563: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
564: if (!order && ii[i+1] != ii[i]) {
565: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
566: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
567: }
568: }
569: PetscBTDestroy(&btee);
570: MatSeqAIJRestoreArray(lGe,&vals);
571: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
572: if (!conforming) {
573: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
574: MatDestroy(&lGt);
575: }
576: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
578: /* identify splitpoints and corner candidates */
579: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
580: if (print) {
581: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
582: MatView(lGe,NULL);
583: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
584: MatView(lGt,NULL);
585: }
586: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
587: MatSeqAIJGetArray(lGt,&vals);
588: for (i=0;i<nv;i++) {
589: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
590: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
591: if (!order) { /* variable order */
592: PetscReal vorder = 0.;
594: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
595: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
596: if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test);
597: ord = 1;
598: }
599: #if defined(PETSC_USE_DEBUG)
600: if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord);
601: #endif
602: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
603: if (PetscBTLookup(btbd,jj[j])) {
604: bdir = PETSC_TRUE;
605: break;
606: }
607: if (vc != ecount[jj[j]]) {
608: sneighs = PETSC_FALSE;
609: } else {
610: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
611: for (k=0;k<vc;k++) {
612: if (vn[k] != en[k]) {
613: sneighs = PETSC_FALSE;
614: break;
615: }
616: }
617: }
618: }
619: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
620: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir);
621: PetscBTSet(btv,i);
622: } else if (test == ord) {
623: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
624: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i);
625: PetscBTSet(btv,i);
626: } else {
627: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i);
628: PetscBTSet(btvcand,i);
629: }
630: }
631: }
632: PetscFree(ecount);
633: PetscFree(vcount);
634: if (ne) {
635: PetscFree(eneighs[0]);
636: }
637: if (nv) {
638: PetscFree(vneighs[0]);
639: }
640: PetscFree(eneighs);
641: PetscFree(vneighs);
642: PetscBTDestroy(&btbd);
644: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
645: if (order != 1) {
646: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
647: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
648: for (i=0;i<nv;i++) {
649: if (PetscBTLookup(btvcand,i)) {
650: PetscBool found = PETSC_FALSE;
651: for (j=ii[i];j<ii[i+1] && !found;j++) {
652: PetscInt k,e = jj[j];
653: if (PetscBTLookup(bte,e)) continue;
654: for (k=iit[e];k<iit[e+1];k++) {
655: PetscInt v = jjt[k];
656: if (v != i && PetscBTLookup(btvcand,v)) {
657: found = PETSC_TRUE;
658: break;
659: }
660: }
661: }
662: if (!found) {
663: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i);
664: PetscBTClear(btvcand,i);
665: } else {
666: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i);
667: }
668: }
669: }
670: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
671: }
672: MatSeqAIJRestoreArray(lGt,&vals);
673: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
674: MatDestroy(&lGe);
676: /* Get the local G^T explicitly */
677: MatDestroy(&lGt);
678: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
679: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
681: /* Mark interior nodal dofs */
682: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
683: PetscBTCreate(nv,&btvi);
684: for (i=1;i<n_neigh;i++) {
685: for (j=0;j<n_shared[i];j++) {
686: PetscBTSet(btvi,shared[i][j]);
687: }
688: }
689: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
691: /* communicate corners and splitpoints */
692: PetscMalloc1(nv,&vmarks);
693: PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
694: PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
695: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
697: if (print) {
698: IS tbz;
700: cum = 0;
701: for (i=0;i<nv;i++)
702: if (sfvleaves[i])
703: vmarks[cum++] = i;
705: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
706: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
707: ISView(tbz,NULL);
708: ISDestroy(&tbz);
709: }
711: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
712: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
713: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
714: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);
716: /* Zero rows of lGt corresponding to identified corners
717: and interior nodal dofs */
718: cum = 0;
719: for (i=0;i<nv;i++) {
720: if (sfvleaves[i]) {
721: vmarks[cum++] = i;
722: PetscBTSet(btv,i);
723: }
724: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
725: }
726: PetscBTDestroy(&btvi);
727: if (print) {
728: IS tbz;
730: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
731: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
732: ISView(tbz,NULL);
733: ISDestroy(&tbz);
734: }
735: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
736: PetscFree(vmarks);
737: PetscSFDestroy(&sfv);
738: PetscFree2(sfvleaves,sfvroots);
740: /* Recompute G */
741: MatDestroy(&lG);
742: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
743: if (print) {
744: PetscObjectSetName((PetscObject)lG,"used_lG");
745: MatView(lG,NULL);
746: PetscObjectSetName((PetscObject)lGt,"used_lGt");
747: MatView(lGt,NULL);
748: }
750: /* Get primal dofs (if any) */
751: cum = 0;
752: for (i=0;i<ne;i++) {
753: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
754: }
755: if (fl2g) {
756: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
757: }
758: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
759: if (print) {
760: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
761: ISView(primals,NULL);
762: }
763: PetscBTDestroy(&bte);
764: /* TODO: what if the user passed in some of them ? */
765: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
766: ISDestroy(&primals);
768: /* Compute edge connectivity */
769: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
770: MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
771: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
772: if (fl2g) {
773: PetscBT btf;
774: PetscInt *iia,*jja,*iiu,*jju;
775: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
777: /* create CSR for all local dofs */
778: PetscMalloc1(n+1,&iia);
779: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
780: if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n);
781: iiu = pcbddc->mat_graph->xadj;
782: jju = pcbddc->mat_graph->adjncy;
783: } else if (pcbddc->use_local_adj) {
784: rest = PETSC_TRUE;
785: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
786: } else {
787: free = PETSC_TRUE;
788: PetscMalloc2(n+1,&iiu,n,&jju);
789: iiu[0] = 0;
790: for (i=0;i<n;i++) {
791: iiu[i+1] = i+1;
792: jju[i] = -1;
793: }
794: }
796: /* import sizes of CSR */
797: iia[0] = 0;
798: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
800: /* overwrite entries corresponding to the Nedelec field */
801: PetscBTCreate(n,&btf);
802: ISGetIndices(nedfieldlocal,&idxs);
803: for (i=0;i<ne;i++) {
804: PetscBTSet(btf,idxs[i]);
805: iia[idxs[i]+1] = ii[i+1]-ii[i];
806: }
808: /* iia in CSR */
809: for (i=0;i<n;i++) iia[i+1] += iia[i];
811: /* jja in CSR */
812: PetscMalloc1(iia[n],&jja);
813: for (i=0;i<n;i++)
814: if (!PetscBTLookup(btf,i))
815: for (j=0;j<iiu[i+1]-iiu[i];j++)
816: jja[iia[i]+j] = jju[iiu[i]+j];
818: /* map edge dofs connectivity */
819: if (jj) {
820: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
821: for (i=0;i<ne;i++) {
822: PetscInt e = idxs[i];
823: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
824: }
825: }
826: ISRestoreIndices(nedfieldlocal,&idxs);
827: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
828: if (rest) {
829: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
830: }
831: if (free) {
832: PetscFree2(iiu,jju);
833: }
834: PetscBTDestroy(&btf);
835: } else {
836: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
837: }
839: /* Analyze interface for edge dofs */
840: PCBDDCAnalyzeInterface(pc);
841: pcbddc->mat_graph->twodim = PETSC_FALSE;
843: /* Get coarse edges in the edge space */
844: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
845: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
847: if (fl2g) {
848: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
849: PetscMalloc1(nee,&eedges);
850: for (i=0;i<nee;i++) {
851: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
852: }
853: } else {
854: eedges = alleedges;
855: primals = allprimals;
856: }
858: /* Mark fine edge dofs with their coarse edge id */
859: PetscMemzero(marks,ne*sizeof(PetscInt));
860: ISGetLocalSize(primals,&cum);
861: ISGetIndices(primals,&idxs);
862: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
863: ISRestoreIndices(primals,&idxs);
864: if (print) {
865: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
866: ISView(primals,NULL);
867: }
869: maxsize = 0;
870: for (i=0;i<nee;i++) {
871: PetscInt size,mark = i+1;
873: ISGetLocalSize(eedges[i],&size);
874: ISGetIndices(eedges[i],&idxs);
875: for (j=0;j<size;j++) marks[idxs[j]] = mark;
876: ISRestoreIndices(eedges[i],&idxs);
877: maxsize = PetscMax(maxsize,size);
878: }
880: /* Find coarse edge endpoints */
881: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
882: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
883: for (i=0;i<nee;i++) {
884: PetscInt mark = i+1,size;
886: ISGetLocalSize(eedges[i],&size);
887: if (!size && nedfieldlocal) continue;
888: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
889: ISGetIndices(eedges[i],&idxs);
890: if (print) {
891: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i);
892: ISView(eedges[i],NULL);
893: }
894: for (j=0;j<size;j++) {
895: PetscInt k, ee = idxs[j];
896: if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee);
897: for (k=ii[ee];k<ii[ee+1];k++) {
898: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]);
899: if (PetscBTLookup(btv,jj[k])) {
900: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]);
901: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
902: PetscInt k2;
903: PetscBool corner = PETSC_FALSE;
904: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
905: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
906: /* it's a corner if either is connected with an edge dof belonging to a different cc or
907: if the edge dof lie on the natural part of the boundary */
908: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
909: corner = PETSC_TRUE;
910: break;
911: }
912: }
913: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
914: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]);
915: PetscBTSet(btv,jj[k]);
916: } else {
917: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
918: }
919: }
920: }
921: }
922: ISRestoreIndices(eedges[i],&idxs);
923: }
924: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
925: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
926: PetscBTDestroy(&btb);
928: /* Reset marked primal dofs */
929: ISGetLocalSize(primals,&cum);
930: ISGetIndices(primals,&idxs);
931: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
932: ISRestoreIndices(primals,&idxs);
934: /* Now use the initial lG */
935: MatDestroy(&lG);
936: MatDestroy(&lGt);
937: lG = lGinit;
938: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
940: /* Compute extended cols indices */
941: PetscBTCreate(nv,&btvc);
942: PetscBTCreate(nee,&bter);
943: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
944: MatSeqAIJGetMaxRowNonzeros(lG,&i);
945: i *= maxsize;
946: PetscCalloc1(nee,&extcols);
947: PetscMalloc2(i,&extrow,i,&gidxs);
948: eerr = PETSC_FALSE;
949: for (i=0;i<nee;i++) {
950: PetscInt size,found = 0;
952: cum = 0;
953: ISGetLocalSize(eedges[i],&size);
954: if (!size && nedfieldlocal) continue;
955: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
956: ISGetIndices(eedges[i],&idxs);
957: PetscBTMemzero(nv,btvc);
958: for (j=0;j<size;j++) {
959: PetscInt k,ee = idxs[j];
960: for (k=ii[ee];k<ii[ee+1];k++) {
961: PetscInt vv = jj[k];
962: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
963: else if (!PetscBTLookupSet(btvc,vv)) found++;
964: }
965: }
966: ISRestoreIndices(eedges[i],&idxs);
967: PetscSortRemoveDupsInt(&cum,extrow);
968: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
969: PetscSortIntWithArray(cum,gidxs,extrow);
970: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
971: /* it may happen that endpoints are not defined at this point
972: if it is the case, mark this edge for a second pass */
973: if (cum != size -1 || found != 2) {
974: PetscBTSet(bter,i);
975: if (print) {
976: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
977: ISView(eedges[i],NULL);
978: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
979: ISView(extcols[i],NULL);
980: }
981: eerr = PETSC_TRUE;
982: }
983: }
984: /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
985: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
986: if (done) {
987: PetscInt *newprimals;
989: PetscMalloc1(ne,&newprimals);
990: ISGetLocalSize(primals,&cum);
991: ISGetIndices(primals,&idxs);
992: PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
993: ISRestoreIndices(primals,&idxs);
994: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
995: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr);
996: for (i=0;i<nee;i++) {
997: PetscBool has_candidates = PETSC_FALSE;
998: if (PetscBTLookup(bter,i)) {
999: PetscInt size,mark = i+1;
1001: ISGetLocalSize(eedges[i],&size);
1002: ISGetIndices(eedges[i],&idxs);
1003: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1004: for (j=0;j<size;j++) {
1005: PetscInt k,ee = idxs[j];
1006: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]);
1007: for (k=ii[ee];k<ii[ee+1];k++) {
1008: /* set all candidates located on the edge as corners */
1009: if (PetscBTLookup(btvcand,jj[k])) {
1010: PetscInt k2,vv = jj[k];
1011: has_candidates = PETSC_TRUE;
1012: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv);
1013: PetscBTSet(btv,vv);
1014: /* set all edge dofs connected to candidate as primals */
1015: for (k2=iit[vv];k2<iit[vv+1];k2++) {
1016: if (marks[jjt[k2]] == mark) {
1017: PetscInt k3,ee2 = jjt[k2];
1018: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2);
1019: newprimals[cum++] = ee2;
1020: /* finally set the new corners */
1021: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
1022: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]);
1023: PetscBTSet(btv,jj[k3]);
1024: }
1025: }
1026: }
1027: } else {
1028: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]);
1029: }
1030: }
1031: }
1032: if (!has_candidates) { /* circular edge */
1033: PetscInt k, ee = idxs[0],*tmarks;
1035: PetscCalloc1(ne,&tmarks);
1036: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i);
1037: for (k=ii[ee];k<ii[ee+1];k++) {
1038: PetscInt k2;
1039: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]);
1040: PetscBTSet(btv,jj[k]);
1041: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
1042: }
1043: for (j=0;j<size;j++) {
1044: if (tmarks[idxs[j]] > 1) {
1045: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]);
1046: newprimals[cum++] = idxs[j];
1047: }
1048: }
1049: PetscFree(tmarks);
1050: }
1051: ISRestoreIndices(eedges[i],&idxs);
1052: }
1053: ISDestroy(&extcols[i]);
1054: }
1055: PetscFree(extcols);
1056: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1057: PetscSortRemoveDupsInt(&cum,newprimals);
1058: if (fl2g) {
1059: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1060: ISDestroy(&primals);
1061: for (i=0;i<nee;i++) {
1062: ISDestroy(&eedges[i]);
1063: }
1064: PetscFree(eedges);
1065: }
1066: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1067: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1068: PetscFree(newprimals);
1069: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1070: ISDestroy(&primals);
1071: PCBDDCAnalyzeInterface(pc);
1072: pcbddc->mat_graph->twodim = PETSC_FALSE;
1073: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1074: if (fl2g) {
1075: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1076: PetscMalloc1(nee,&eedges);
1077: for (i=0;i<nee;i++) {
1078: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1079: }
1080: } else {
1081: eedges = alleedges;
1082: primals = allprimals;
1083: }
1084: PetscCalloc1(nee,&extcols);
1086: /* Mark again */
1087: PetscMemzero(marks,ne*sizeof(PetscInt));
1088: for (i=0;i<nee;i++) {
1089: PetscInt size,mark = i+1;
1091: ISGetLocalSize(eedges[i],&size);
1092: ISGetIndices(eedges[i],&idxs);
1093: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1094: ISRestoreIndices(eedges[i],&idxs);
1095: }
1096: if (print) {
1097: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1098: ISView(primals,NULL);
1099: }
1101: /* Recompute extended cols */
1102: eerr = PETSC_FALSE;
1103: for (i=0;i<nee;i++) {
1104: PetscInt size;
1106: cum = 0;
1107: ISGetLocalSize(eedges[i],&size);
1108: if (!size && nedfieldlocal) continue;
1109: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1110: ISGetIndices(eedges[i],&idxs);
1111: for (j=0;j<size;j++) {
1112: PetscInt k,ee = idxs[j];
1113: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1114: }
1115: ISRestoreIndices(eedges[i],&idxs);
1116: PetscSortRemoveDupsInt(&cum,extrow);
1117: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1118: PetscSortIntWithArray(cum,gidxs,extrow);
1119: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1120: if (cum != size -1) {
1121: if (print) {
1122: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1123: ISView(eedges[i],NULL);
1124: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1125: ISView(extcols[i],NULL);
1126: }
1127: eerr = PETSC_TRUE;
1128: }
1129: }
1130: }
1131: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1132: PetscFree2(extrow,gidxs);
1133: PetscBTDestroy(&bter);
1134: if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1135: /* an error should not occur at this point */
1136: if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1138: /* Check the number of endpoints */
1139: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1140: PetscMalloc1(2*nee,&corners);
1141: PetscMalloc1(nee,&cedges);
1142: for (i=0;i<nee;i++) {
1143: PetscInt size, found = 0, gc[2];
1145: /* init with defaults */
1146: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1147: ISGetLocalSize(eedges[i],&size);
1148: if (!size && nedfieldlocal) continue;
1149: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i);
1150: ISGetIndices(eedges[i],&idxs);
1151: PetscBTMemzero(nv,btvc);
1152: for (j=0;j<size;j++) {
1153: PetscInt k,ee = idxs[j];
1154: for (k=ii[ee];k<ii[ee+1];k++) {
1155: PetscInt vv = jj[k];
1156: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1157: if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i);
1158: corners[i*2+found++] = vv;
1159: }
1160: }
1161: }
1162: if (found != 2) {
1163: PetscInt e;
1164: if (fl2g) {
1165: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1166: } else {
1167: e = idxs[0];
1168: }
1169: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]);
1170: }
1172: /* get primal dof index on this coarse edge */
1173: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1174: if (gc[0] > gc[1]) {
1175: PetscInt swap = corners[2*i];
1176: corners[2*i] = corners[2*i+1];
1177: corners[2*i+1] = swap;
1178: }
1179: cedges[i] = idxs[size-1];
1180: ISRestoreIndices(eedges[i],&idxs);
1181: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1182: }
1183: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1184: PetscBTDestroy(&btvc);
1186: #if defined(PETSC_USE_DEBUG)
1187: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1188: not interfere with neighbouring coarse edges */
1189: PetscMalloc1(nee+1,&emarks);
1190: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1191: for (i=0;i<nv;i++) {
1192: PetscInt emax = 0,eemax = 0;
1194: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1195: PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1196: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1197: for (j=1;j<nee+1;j++) {
1198: if (emax < emarks[j]) {
1199: emax = emarks[j];
1200: eemax = j;
1201: }
1202: }
1203: /* not relevant for edges */
1204: if (!eemax) continue;
1206: for (j=ii[i];j<ii[i+1];j++) {
1207: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1208: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]);
1209: }
1210: }
1211: }
1212: PetscFree(emarks);
1213: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1214: #endif
1216: /* Compute extended rows indices for edge blocks of the change of basis */
1217: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1218: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1219: extmem *= maxsize;
1220: PetscMalloc1(extmem*nee,&extrow);
1221: PetscMalloc1(nee,&extrows);
1222: PetscCalloc1(nee,&extrowcum);
1223: for (i=0;i<nv;i++) {
1224: PetscInt mark = 0,size,start;
1226: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1227: for (j=ii[i];j<ii[i+1];j++)
1228: if (marks[jj[j]] && !mark)
1229: mark = marks[jj[j]];
1231: /* not relevant */
1232: if (!mark) continue;
1234: /* import extended row */
1235: mark--;
1236: start = mark*extmem+extrowcum[mark];
1237: size = ii[i+1]-ii[i];
1238: if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem);
1239: PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1240: extrowcum[mark] += size;
1241: }
1242: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1243: MatDestroy(&lGt);
1244: PetscFree(marks);
1246: /* Compress extrows */
1247: cum = 0;
1248: for (i=0;i<nee;i++) {
1249: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1250: PetscSortRemoveDupsInt(&size,start);
1251: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1252: cum = PetscMax(cum,size);
1253: }
1254: PetscFree(extrowcum);
1255: PetscBTDestroy(&btv);
1256: PetscBTDestroy(&btvcand);
1258: /* Workspace for lapack inner calls and VecSetValues */
1259: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1261: /* Create change of basis matrix (preallocation can be improved) */
1262: MatCreate(comm,&T);
1263: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1264: pc->pmat->rmap->N,pc->pmat->rmap->N);
1265: MatSetType(T,MATAIJ);
1266: MatSeqAIJSetPreallocation(T,10,NULL);
1267: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1268: MatSetLocalToGlobalMapping(T,al2g,al2g);
1269: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1270: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1271: ISLocalToGlobalMappingDestroy(&al2g);
1273: /* Defaults to identity */
1274: MatCreateVecs(pc->pmat,&tvec,NULL);
1275: VecSet(tvec,1.0);
1276: MatDiagonalSet(T,tvec,INSERT_VALUES);
1277: VecDestroy(&tvec);
1279: /* Create discrete gradient for the coarser level if needed */
1280: MatDestroy(&pcbddc->nedcG);
1281: ISDestroy(&pcbddc->nedclocal);
1282: if (pcbddc->current_level < pcbddc->max_levels) {
1283: ISLocalToGlobalMapping cel2g,cvl2g;
1284: IS wis,gwis;
1285: PetscInt cnv,cne;
1287: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1288: if (fl2g) {
1289: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1290: } else {
1291: PetscObjectReference((PetscObject)wis);
1292: pcbddc->nedclocal = wis;
1293: }
1294: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1295: ISDestroy(&wis);
1296: ISRenumber(gwis,NULL,&cne,&wis);
1297: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1298: ISDestroy(&wis);
1299: ISDestroy(&gwis);
1301: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1302: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1303: ISDestroy(&wis);
1304: ISRenumber(gwis,NULL,&cnv,&wis);
1305: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1306: ISDestroy(&wis);
1307: ISDestroy(&gwis);
1309: MatCreate(comm,&pcbddc->nedcG);
1310: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1311: MatSetType(pcbddc->nedcG,MATAIJ);
1312: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1313: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1314: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1315: ISLocalToGlobalMappingDestroy(&cel2g);
1316: ISLocalToGlobalMappingDestroy(&cvl2g);
1317: }
1318: ISLocalToGlobalMappingDestroy(&vl2g);
1320: #if defined(PRINT_GDET)
1321: inc = 0;
1322: lev = pcbddc->current_level;
1323: #endif
1325: /* Insert values in the change of basis matrix */
1326: for (i=0;i<nee;i++) {
1327: Mat Gins = NULL, GKins = NULL;
1328: IS cornersis = NULL;
1329: PetscScalar cvals[2];
1331: if (pcbddc->nedcG) {
1332: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1333: }
1334: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1335: if (Gins && GKins) {
1336: PetscScalar *data;
1337: const PetscInt *rows,*cols;
1338: PetscInt nrh,nch,nrc,ncc;
1340: ISGetIndices(eedges[i],&cols);
1341: /* H1 */
1342: ISGetIndices(extrows[i],&rows);
1343: MatGetSize(Gins,&nrh,&nch);
1344: MatDenseGetArray(Gins,&data);
1345: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1346: MatDenseRestoreArray(Gins,&data);
1347: ISRestoreIndices(extrows[i],&rows);
1348: /* complement */
1349: MatGetSize(GKins,&nrc,&ncc);
1350: if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i);
1351: if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i);
1352: if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc);
1353: MatDenseGetArray(GKins,&data);
1354: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1355: MatDenseRestoreArray(GKins,&data);
1357: /* coarse discrete gradient */
1358: if (pcbddc->nedcG) {
1359: PetscInt cols[2];
1361: cols[0] = 2*i;
1362: cols[1] = 2*i+1;
1363: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1364: }
1365: ISRestoreIndices(eedges[i],&cols);
1366: }
1367: ISDestroy(&extrows[i]);
1368: ISDestroy(&extcols[i]);
1369: ISDestroy(&cornersis);
1370: MatDestroy(&Gins);
1371: MatDestroy(&GKins);
1372: }
1373: ISLocalToGlobalMappingDestroy(&el2g);
1375: /* Start assembling */
1376: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1377: if (pcbddc->nedcG) {
1378: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1379: }
1381: /* Free */
1382: if (fl2g) {
1383: ISDestroy(&primals);
1384: for (i=0;i<nee;i++) {
1385: ISDestroy(&eedges[i]);
1386: }
1387: PetscFree(eedges);
1388: }
1390: /* hack mat_graph with primal dofs on the coarse edges */
1391: {
1392: PCBDDCGraph graph = pcbddc->mat_graph;
1393: PetscInt *oqueue = graph->queue;
1394: PetscInt *ocptr = graph->cptr;
1395: PetscInt ncc,*idxs;
1397: /* find first primal edge */
1398: if (pcbddc->nedclocal) {
1399: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1400: } else {
1401: if (fl2g) {
1402: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1403: }
1404: idxs = cedges;
1405: }
1406: cum = 0;
1407: while (cum < nee && cedges[cum] < 0) cum++;
1409: /* adapt connected components */
1410: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1411: graph->cptr[0] = 0;
1412: for (i=0,ncc=0;i<graph->ncc;i++) {
1413: PetscInt lc = ocptr[i+1]-ocptr[i];
1414: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1415: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1416: graph->queue[graph->cptr[ncc]] = cedges[cum];
1417: ncc++;
1418: lc--;
1419: cum++;
1420: while (cum < nee && cedges[cum] < 0) cum++;
1421: }
1422: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1423: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1424: ncc++;
1425: }
1426: graph->ncc = ncc;
1427: if (pcbddc->nedclocal) {
1428: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1429: }
1430: PetscFree2(ocptr,oqueue);
1431: }
1432: ISLocalToGlobalMappingDestroy(&fl2g);
1433: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1434: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1435: MatDestroy(&conn);
1437: ISDestroy(&nedfieldlocal);
1438: PetscFree(extrow);
1439: PetscFree2(work,rwork);
1440: PetscFree(corners);
1441: PetscFree(cedges);
1442: PetscFree(extrows);
1443: PetscFree(extcols);
1444: MatDestroy(&lG);
1446: /* Complete assembling */
1447: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1448: if (pcbddc->nedcG) {
1449: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1450: #if 0
1451: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1452: MatView(pcbddc->nedcG,NULL);
1453: #endif
1454: }
1456: /* set change of basis */
1457: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1458: MatDestroy(&T);
1460: return(0);
1461: }
1463: /* the near-null space of BDDC carries information on quadrature weights,
1464: and these can be collinear -> so cheat with MatNullSpaceCreate
1465: and create a suitable set of basis vectors first */
1466: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1467: {
1469: PetscInt i;
1472: for (i=0;i<nvecs;i++) {
1473: PetscInt first,last;
1475: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1476: if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1477: if (i>=first && i < last) {
1478: PetscScalar *data;
1479: VecGetArray(quad_vecs[i],&data);
1480: if (!has_const) {
1481: data[i-first] = 1.;
1482: } else {
1483: data[2*i-first] = 1./PetscSqrtReal(2.);
1484: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1485: }
1486: VecRestoreArray(quad_vecs[i],&data);
1487: }
1488: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1489: }
1490: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1491: for (i=0;i<nvecs;i++) { /* reset vectors */
1492: PetscInt first,last;
1493: VecLockPop(quad_vecs[i]);
1494: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1495: if (i>=first && i < last) {
1496: PetscScalar *data;
1497: VecGetArray(quad_vecs[i],&data);
1498: if (!has_const) {
1499: data[i-first] = 0.;
1500: } else {
1501: data[2*i-first] = 0.;
1502: data[2*i-first+1] = 0.;
1503: }
1504: VecRestoreArray(quad_vecs[i],&data);
1505: }
1506: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1507: VecLockPush(quad_vecs[i]);
1508: }
1509: return(0);
1510: }
1512: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1513: {
1514: Mat loc_divudotp;
1515: Vec p,v,vins,quad_vec,*quad_vecs;
1516: ISLocalToGlobalMapping map;
1517: PetscScalar *vals;
1518: const PetscScalar *array;
1519: PetscInt i,maxneighs,maxsize;
1520: PetscInt n_neigh,*neigh,*n_shared,**shared;
1521: PetscMPIInt rank;
1522: PetscErrorCode ierr;
1525: ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1526: MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1527: if (!maxneighs) {
1528: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1529: *nnsp = NULL;
1530: return(0);
1531: }
1532: maxsize = 0;
1533: for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1534: PetscMalloc1(maxsize,&vals);
1535: /* create vectors to hold quadrature weights */
1536: MatCreateVecs(A,&quad_vec,NULL);
1537: if (!transpose) {
1538: MatGetLocalToGlobalMapping(A,&map,NULL);
1539: } else {
1540: MatGetLocalToGlobalMapping(A,NULL,&map);
1541: }
1542: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1543: VecDestroy(&quad_vec);
1544: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1545: for (i=0;i<maxneighs;i++) {
1546: VecLockPop(quad_vecs[i]);
1547: VecSetLocalToGlobalMapping(quad_vecs[i],map);
1548: }
1550: /* compute local quad vec */
1551: MatISGetLocalMat(divudotp,&loc_divudotp);
1552: if (!transpose) {
1553: MatCreateVecs(loc_divudotp,&v,&p);
1554: } else {
1555: MatCreateVecs(loc_divudotp,&p,&v);
1556: }
1557: VecSet(p,1.);
1558: if (!transpose) {
1559: MatMultTranspose(loc_divudotp,p,v);
1560: } else {
1561: MatMult(loc_divudotp,p,v);
1562: }
1563: if (vl2l) {
1564: Mat lA;
1565: VecScatter sc;
1567: MatISGetLocalMat(A,&lA);
1568: MatCreateVecs(lA,&vins,NULL);
1569: VecScatterCreate(v,vl2l,vins,NULL,&sc);
1570: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1571: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1572: VecScatterDestroy(&sc);
1573: } else {
1574: vins = v;
1575: }
1576: VecGetArrayRead(vins,&array);
1577: VecDestroy(&p);
1579: /* insert in global quadrature vecs */
1580: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1581: for (i=0;i<n_neigh;i++) {
1582: const PetscInt *idxs;
1583: PetscInt idx,nn,j;
1585: idxs = shared[i];
1586: nn = n_shared[i];
1587: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1588: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1589: idx = -(idx+1);
1590: VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1591: }
1592: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1593: VecRestoreArrayRead(vins,&array);
1594: if (vl2l) {
1595: VecDestroy(&vins);
1596: }
1597: VecDestroy(&v);
1598: PetscFree(vals);
1600: /* assemble near null space */
1601: for (i=0;i<maxneighs;i++) {
1602: VecAssemblyBegin(quad_vecs[i]);
1603: }
1604: for (i=0;i<maxneighs;i++) {
1605: VecAssemblyEnd(quad_vecs[i]);
1606: VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1607: VecLockPush(quad_vecs[i]);
1608: }
1609: VecDestroyVecs(maxneighs,&quad_vecs);
1610: return(0);
1611: }
1613: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1614: {
1615: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1619: if (primalv) {
1620: if (pcbddc->user_primal_vertices_local) {
1621: IS list[2], newp;
1623: list[0] = primalv;
1624: list[1] = pcbddc->user_primal_vertices_local;
1625: ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1626: ISSortRemoveDups(newp);
1627: ISDestroy(&list[1]);
1628: pcbddc->user_primal_vertices_local = newp;
1629: } else {
1630: PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1631: }
1632: }
1633: return(0);
1634: }
1636: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1637: {
1638: PetscInt f, *comp = (PetscInt *)ctx;
1641: for (f=0;f<Nf;f++) out[f] = X[*comp];
1642: return(0);
1643: }
1645: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1646: {
1648: Vec local,global;
1649: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1650: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1651: PetscBool monolithic = PETSC_FALSE;
1654: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1655: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1656: PetscOptionsEnd();
1657: /* need to convert from global to local topology information and remove references to information in global ordering */
1658: MatCreateVecs(pc->pmat,&global,NULL);
1659: MatCreateVecs(matis->A,&local,NULL);
1660: if (monolithic) { /* just get block size to properly compute vertices */
1661: if (pcbddc->vertex_size == 1) {
1662: MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1663: }
1664: goto boundary;
1665: }
1667: if (pcbddc->user_provided_isfordofs) {
1668: if (pcbddc->n_ISForDofs) {
1669: PetscInt i;
1670: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1671: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1672: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1673: ISDestroy(&pcbddc->ISForDofs[i]);
1674: }
1675: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1676: pcbddc->n_ISForDofs = 0;
1677: PetscFree(pcbddc->ISForDofs);
1678: }
1679: } else {
1680: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1681: DM dm;
1683: PCGetDM(pc, &dm);
1684: if (!dm) {
1685: MatGetDM(pc->pmat, &dm);
1686: }
1687: if (dm) {
1688: IS *fields;
1689: PetscInt nf,i;
1690: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1691: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1692: for (i=0;i<nf;i++) {
1693: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1694: ISDestroy(&fields[i]);
1695: }
1696: PetscFree(fields);
1697: pcbddc->n_ISForDofsLocal = nf;
1698: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1699: PetscContainer c;
1701: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1702: if (c) {
1703: MatISLocalFields lf;
1704: PetscContainerGetPointer(c,(void**)&lf);
1705: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1706: } else { /* fallback, create the default fields if bs > 1 */
1707: PetscInt i, n = matis->A->rmap->n;
1708: MatGetBlockSize(pc->pmat,&i);
1709: if (i > 1) {
1710: pcbddc->n_ISForDofsLocal = i;
1711: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1712: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1713: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1714: }
1715: }
1716: }
1717: }
1718: } else {
1719: PetscInt i;
1720: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1721: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1722: }
1723: }
1724: }
1726: boundary:
1727: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1728: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1729: } else if (pcbddc->DirichletBoundariesLocal) {
1730: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1731: }
1732: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1733: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1734: } else if (pcbddc->NeumannBoundariesLocal) {
1735: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1736: }
1737: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1738: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1739: }
1740: VecDestroy(&global);
1741: VecDestroy(&local);
1742: /* detect local disconnected subdomains if requested (use matis->A) */
1743: if (pcbddc->detect_disconnected) {
1744: IS primalv = NULL;
1745: PetscInt i;
1747: for (i=0;i<pcbddc->n_local_subs;i++) {
1748: ISDestroy(&pcbddc->local_subs[i]);
1749: }
1750: PetscFree(pcbddc->local_subs);
1751: PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1752: PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1753: ISDestroy(&primalv);
1754: }
1755: /* early stage corner detection */
1756: {
1757: DM dm;
1759: MatGetDM(pc->pmat,&dm);
1760: if (dm) {
1761: PetscBool isda;
1763: PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1764: if (isda) {
1765: ISLocalToGlobalMapping l2l;
1766: IS corners;
1767: Mat lA;
1769: DMDAGetSubdomainCornersIS(dm,&corners);
1770: MatISGetLocalMat(pc->pmat,&lA);
1771: MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1772: MatISRestoreLocalMat(pc->pmat,&lA);
1773: if (l2l) {
1774: const PetscInt *idx;
1775: PetscInt bs,*idxout,n;
1777: ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1778: ISGetLocalSize(corners,&n);
1779: ISGetIndices(corners,&idx);
1780: PetscMalloc1(n,&idxout);
1781: ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1782: ISRestoreIndices(corners,&idx);
1783: DMDARestoreSubdomainCornersIS(dm,&corners);
1784: ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1785: PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1786: ISDestroy(&corners);
1787: pcbddc->corner_selected = PETSC_TRUE;
1788: } else { /* not from DMDA */
1789: DMDARestoreSubdomainCornersIS(dm,&corners);
1790: }
1791: }
1792: }
1793: }
1794: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1795: DM dm;
1797: PCGetDM(pc,&dm);
1798: if (!dm) {
1799: MatGetDM(pc->pmat,&dm);
1800: }
1801: if (dm) {
1802: Vec vcoords;
1803: PetscSection section;
1804: PetscReal *coords;
1805: PetscInt d,cdim,nl,nf,**ctxs;
1806: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1808: DMGetCoordinateDim(dm,&cdim);
1809: DMGetDefaultSection(dm,§ion);
1810: PetscSectionGetNumFields(section,&nf);
1811: DMCreateGlobalVector(dm,&vcoords);
1812: VecGetLocalSize(vcoords,&nl);
1813: PetscMalloc1(nl*cdim,&coords);
1814: PetscMalloc2(nf,&funcs,nf,&ctxs);
1815: PetscMalloc1(nf,&ctxs[0]);
1816: for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1817: for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1818: for (d=0;d<cdim;d++) {
1819: PetscInt i;
1820: const PetscScalar *v;
1822: for (i=0;i<nf;i++) ctxs[i][0] = d;
1823: DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1824: VecGetArrayRead(vcoords,&v);
1825: for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1826: VecRestoreArrayRead(vcoords,&v);
1827: }
1828: VecDestroy(&vcoords);
1829: PCSetCoordinates(pc,cdim,nl,coords);
1830: PetscFree(coords);
1831: PetscFree(ctxs[0]);
1832: PetscFree2(funcs,ctxs);
1833: }
1834: }
1835: return(0);
1836: }
1838: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1839: {
1840: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1841: PetscErrorCode ierr;
1842: IS nis;
1843: const PetscInt *idxs;
1844: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1845: PetscBool *ld;
1848: if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1849: MatISSetUpSF(pc->pmat);
1850: if (mop == MPI_LAND) {
1851: /* init rootdata with true */
1852: ld = (PetscBool*) matis->sf_rootdata;
1853: for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1854: } else {
1855: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1856: }
1857: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1858: ISGetLocalSize(*is,&nd);
1859: ISGetIndices(*is,&idxs);
1860: ld = (PetscBool*) matis->sf_leafdata;
1861: for (i=0;i<nd;i++)
1862: if (-1 < idxs[i] && idxs[i] < n)
1863: ld[idxs[i]] = PETSC_TRUE;
1864: ISRestoreIndices(*is,&idxs);
1865: PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1866: PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1867: PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1868: PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1869: if (mop == MPI_LAND) {
1870: PetscMalloc1(nd,&nidxs);
1871: } else {
1872: PetscMalloc1(n,&nidxs);
1873: }
1874: for (i=0,nnd=0;i<n;i++)
1875: if (ld[i])
1876: nidxs[nnd++] = i;
1877: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1878: ISDestroy(is);
1879: *is = nis;
1880: return(0);
1881: }
1883: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1884: {
1885: PC_IS *pcis = (PC_IS*)(pc->data);
1886: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1887: PetscErrorCode ierr;
1890: if (!pcbddc->benign_have_null) {
1891: return(0);
1892: }
1893: if (pcbddc->ChangeOfBasisMatrix) {
1894: Vec swap;
1896: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1897: swap = pcbddc->work_change;
1898: pcbddc->work_change = r;
1899: r = swap;
1900: }
1901: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1902: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1903: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1904: VecSet(z,0.);
1905: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1906: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1907: if (pcbddc->ChangeOfBasisMatrix) {
1908: pcbddc->work_change = r;
1909: VecCopy(z,pcbddc->work_change);
1910: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1911: }
1912: return(0);
1913: }
1915: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1916: {
1917: PCBDDCBenignMatMult_ctx ctx;
1918: PetscErrorCode ierr;
1919: PetscBool apply_right,apply_left,reset_x;
1922: MatShellGetContext(A,&ctx);
1923: if (transpose) {
1924: apply_right = ctx->apply_left;
1925: apply_left = ctx->apply_right;
1926: } else {
1927: apply_right = ctx->apply_right;
1928: apply_left = ctx->apply_left;
1929: }
1930: reset_x = PETSC_FALSE;
1931: if (apply_right) {
1932: const PetscScalar *ax;
1933: PetscInt nl,i;
1935: VecGetLocalSize(x,&nl);
1936: VecGetArrayRead(x,&ax);
1937: PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1938: VecRestoreArrayRead(x,&ax);
1939: for (i=0;i<ctx->benign_n;i++) {
1940: PetscScalar sum,val;
1941: const PetscInt *idxs;
1942: PetscInt nz,j;
1943: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1944: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1945: sum = 0.;
1946: if (ctx->apply_p0) {
1947: val = ctx->work[idxs[nz-1]];
1948: for (j=0;j<nz-1;j++) {
1949: sum += ctx->work[idxs[j]];
1950: ctx->work[idxs[j]] += val;
1951: }
1952: } else {
1953: for (j=0;j<nz-1;j++) {
1954: sum += ctx->work[idxs[j]];
1955: }
1956: }
1957: ctx->work[idxs[nz-1]] -= sum;
1958: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1959: }
1960: VecPlaceArray(x,ctx->work);
1961: reset_x = PETSC_TRUE;
1962: }
1963: if (transpose) {
1964: MatMultTranspose(ctx->A,x,y);
1965: } else {
1966: MatMult(ctx->A,x,y);
1967: }
1968: if (reset_x) {
1969: VecResetArray(x);
1970: }
1971: if (apply_left) {
1972: PetscScalar *ay;
1973: PetscInt i;
1975: VecGetArray(y,&ay);
1976: for (i=0;i<ctx->benign_n;i++) {
1977: PetscScalar sum,val;
1978: const PetscInt *idxs;
1979: PetscInt nz,j;
1980: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1981: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1982: val = -ay[idxs[nz-1]];
1983: if (ctx->apply_p0) {
1984: sum = 0.;
1985: for (j=0;j<nz-1;j++) {
1986: sum += ay[idxs[j]];
1987: ay[idxs[j]] += val;
1988: }
1989: ay[idxs[nz-1]] += sum;
1990: } else {
1991: for (j=0;j<nz-1;j++) {
1992: ay[idxs[j]] += val;
1993: }
1994: ay[idxs[nz-1]] = 0.;
1995: }
1996: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1997: }
1998: VecRestoreArray(y,&ay);
1999: }
2000: return(0);
2001: }
2003: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2004: {
2008: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2009: return(0);
2010: }
2012: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2013: {
2017: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2018: return(0);
2019: }
2021: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2022: {
2023: PC_IS *pcis = (PC_IS*)pc->data;
2024: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2025: PCBDDCBenignMatMult_ctx ctx;
2026: PetscErrorCode ierr;
2029: if (!restore) {
2030: Mat A_IB,A_BI;
2031: PetscScalar *work;
2032: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2034: if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2035: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2036: PetscMalloc1(pcis->n,&work);
2037: MatCreate(PETSC_COMM_SELF,&A_IB);
2038: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2039: MatSetType(A_IB,MATSHELL);
2040: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2041: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2042: PetscNew(&ctx);
2043: MatShellSetContext(A_IB,ctx);
2044: ctx->apply_left = PETSC_TRUE;
2045: ctx->apply_right = PETSC_FALSE;
2046: ctx->apply_p0 = PETSC_FALSE;
2047: ctx->benign_n = pcbddc->benign_n;
2048: if (reuse) {
2049: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2050: ctx->free = PETSC_FALSE;
2051: } else { /* TODO: could be optimized for successive solves */
2052: ISLocalToGlobalMapping N_to_D;
2053: PetscInt i;
2055: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2056: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2057: for (i=0;i<pcbddc->benign_n;i++) {
2058: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2059: }
2060: ISLocalToGlobalMappingDestroy(&N_to_D);
2061: ctx->free = PETSC_TRUE;
2062: }
2063: ctx->A = pcis->A_IB;
2064: ctx->work = work;
2065: MatSetUp(A_IB);
2066: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2067: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2068: pcis->A_IB = A_IB;
2070: /* A_BI as A_IB^T */
2071: MatCreateTranspose(A_IB,&A_BI);
2072: pcbddc->benign_original_mat = pcis->A_BI;
2073: pcis->A_BI = A_BI;
2074: } else {
2075: if (!pcbddc->benign_original_mat) {
2076: return(0);
2077: }
2078: MatShellGetContext(pcis->A_IB,&ctx);
2079: MatDestroy(&pcis->A_IB);
2080: pcis->A_IB = ctx->A;
2081: ctx->A = NULL;
2082: MatDestroy(&pcis->A_BI);
2083: pcis->A_BI = pcbddc->benign_original_mat;
2084: pcbddc->benign_original_mat = NULL;
2085: if (ctx->free) {
2086: PetscInt i;
2087: for (i=0;i<ctx->benign_n;i++) {
2088: ISDestroy(&ctx->benign_zerodiag_subs[i]);
2089: }
2090: PetscFree(ctx->benign_zerodiag_subs);
2091: }
2092: PetscFree(ctx->work);
2093: PetscFree(ctx);
2094: }
2095: return(0);
2096: }
2098: /* used just in bddc debug mode */
2099: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2100: {
2101: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2102: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2103: Mat An;
2107: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2108: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2109: if (is1) {
2110: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2111: MatDestroy(&An);
2112: } else {
2113: *B = An;
2114: }
2115: return(0);
2116: }
2118: /* TODO: add reuse flag */
2119: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2120: {
2121: Mat Bt;
2122: PetscScalar *a,*bdata;
2123: const PetscInt *ii,*ij;
2124: PetscInt m,n,i,nnz,*bii,*bij;
2125: PetscBool flg_row;
2129: MatGetSize(A,&n,&m);
2130: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2131: MatSeqAIJGetArray(A,&a);
2132: nnz = n;
2133: for (i=0;i<ii[n];i++) {
2134: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2135: }
2136: PetscMalloc1(n+1,&bii);
2137: PetscMalloc1(nnz,&bij);
2138: PetscMalloc1(nnz,&bdata);
2139: nnz = 0;
2140: bii[0] = 0;
2141: for (i=0;i<n;i++) {
2142: PetscInt j;
2143: for (j=ii[i];j<ii[i+1];j++) {
2144: PetscScalar entry = a[j];
2145: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2146: bij[nnz] = ij[j];
2147: bdata[nnz] = entry;
2148: nnz++;
2149: }
2150: }
2151: bii[i+1] = nnz;
2152: }
2153: MatSeqAIJRestoreArray(A,&a);
2154: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2155: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2156: {
2157: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2158: b->free_a = PETSC_TRUE;
2159: b->free_ij = PETSC_TRUE;
2160: }
2161: if (*B == A) {
2162: MatDestroy(&A);
2163: }
2164: *B = Bt;
2165: return(0);
2166: }
2168: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv)
2169: {
2170: Mat B = NULL;
2171: DM dm;
2172: IS is_dummy,*cc_n;
2173: ISLocalToGlobalMapping l2gmap_dummy;
2174: PCBDDCGraph graph;
2175: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2176: PetscInt i,n;
2177: PetscInt *xadj,*adjncy;
2178: PetscBool isplex = PETSC_FALSE;
2179: PetscErrorCode ierr;
2182: if (ncc) *ncc = 0;
2183: if (cc) *cc = NULL;
2184: if (primalv) *primalv = NULL;
2185: PCBDDCGraphCreate(&graph);
2186: PCGetDM(pc,&dm);
2187: if (!dm) {
2188: MatGetDM(pc->pmat,&dm);
2189: }
2190: if (dm) {
2191: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2192: }
2193: if (isplex) { /* this code has been modified from plexpartition.c */
2194: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2195: PetscInt *adj = NULL;
2196: IS cellNumbering;
2197: const PetscInt *cellNum;
2198: PetscBool useCone, useClosure;
2199: PetscSection section;
2200: PetscSegBuffer adjBuffer;
2201: PetscSF sfPoint;
2205: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2206: DMGetPointSF(dm, &sfPoint);
2207: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2208: /* Build adjacency graph via a section/segbuffer */
2209: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2210: PetscSectionSetChart(section, pStart, pEnd);
2211: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2212: /* Always use FVM adjacency to create partitioner graph */
2213: DMPlexGetAdjacencyUseCone(dm, &useCone);
2214: DMPlexGetAdjacencyUseClosure(dm, &useClosure);
2215: DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);
2216: DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);
2217: DMPlexGetCellNumbering(dm, &cellNumbering);
2218: ISGetIndices(cellNumbering, &cellNum);
2219: for (n = 0, p = pStart; p < pEnd; p++) {
2220: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2221: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2222: adjSize = PETSC_DETERMINE;
2223: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2224: for (a = 0; a < adjSize; ++a) {
2225: const PetscInt point = adj[a];
2226: if (pStart <= point && point < pEnd) {
2227: PetscInt *PETSC_RESTRICT pBuf;
2228: PetscSectionAddDof(section, p, 1);
2229: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2230: *pBuf = point;
2231: }
2232: }
2233: n++;
2234: }
2235: DMPlexSetAdjacencyUseCone(dm, useCone);
2236: DMPlexSetAdjacencyUseClosure(dm, useClosure);
2237: /* Derive CSR graph from section/segbuffer */
2238: PetscSectionSetUp(section);
2239: PetscSectionGetStorageSize(section, &size);
2240: PetscMalloc1(n+1, &xadj);
2241: for (idx = 0, p = pStart; p < pEnd; p++) {
2242: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2243: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2244: }
2245: xadj[n] = size;
2246: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2247: /* Clean up */
2248: PetscSegBufferDestroy(&adjBuffer);
2249: PetscSectionDestroy(§ion);
2250: PetscFree(adj);
2251: graph->xadj = xadj;
2252: graph->adjncy = adjncy;
2253: } else {
2254: Mat A;
2255: PetscBool filter = PETSC_FALSE, isseqaij, flg_row;
2257: MatISGetLocalMat(pc->pmat,&A);
2258: if (!A->rmap->N || !A->cmap->N) {
2259: PCBDDCGraphDestroy(&graph);
2260: return(0);
2261: }
2262: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2263: if (!isseqaij && filter) {
2264: PetscBool isseqdense;
2266: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2267: if (!isseqdense) {
2268: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2269: } else { /* TODO: rectangular case and LDA */
2270: PetscScalar *array;
2271: PetscReal chop=1.e-6;
2273: MatDuplicate(A,MAT_COPY_VALUES,&B);
2274: MatDenseGetArray(B,&array);
2275: MatGetSize(B,&n,NULL);
2276: for (i=0;i<n;i++) {
2277: PetscInt j;
2278: for (j=i+1;j<n;j++) {
2279: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2280: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2281: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2282: }
2283: }
2284: MatDenseRestoreArray(B,&array);
2285: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2286: }
2287: } else {
2288: PetscObjectReference((PetscObject)A);
2289: B = A;
2290: }
2291: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2293: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2294: if (filter) {
2295: PetscScalar *data;
2296: PetscInt j,cum;
2298: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2299: MatSeqAIJGetArray(B,&data);
2300: cum = 0;
2301: for (i=0;i<n;i++) {
2302: PetscInt t;
2304: for (j=xadj[i];j<xadj[i+1];j++) {
2305: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2306: continue;
2307: }
2308: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2309: }
2310: t = xadj_filtered[i];
2311: xadj_filtered[i] = cum;
2312: cum += t;
2313: }
2314: MatSeqAIJRestoreArray(B,&data);
2315: graph->xadj = xadj_filtered;
2316: graph->adjncy = adjncy_filtered;
2317: } else {
2318: graph->xadj = xadj;
2319: graph->adjncy = adjncy;
2320: }
2321: }
2322: /* compute local connected components using PCBDDCGraph */
2323: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2324: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2325: ISDestroy(&is_dummy);
2326: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2327: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2328: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2329: PCBDDCGraphComputeConnectedComponents(graph);
2331: /* partial clean up */
2332: PetscFree2(xadj_filtered,adjncy_filtered);
2333: if (B) {
2334: PetscBool flg_row;
2335: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2336: MatDestroy(&B);
2337: }
2338: if (isplex) {
2339: PetscFree(xadj);
2340: PetscFree(adjncy);
2341: }
2343: /* get back data */
2344: if (isplex) {
2345: if (ncc) *ncc = graph->ncc;
2346: if (cc || primalv) {
2347: Mat A;
2348: PetscBT btv,btvt;
2349: PetscSection subSection;
2350: PetscInt *ids,cum,cump,*cids,*pids;
2352: DMPlexGetSubdomainSection(dm,&subSection);
2353: MatISGetLocalMat(pc->pmat,&A);
2354: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2355: PetscBTCreate(A->rmap->n,&btv);
2356: PetscBTCreate(A->rmap->n,&btvt);
2358: cids[0] = 0;
2359: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2360: PetscInt j;
2362: PetscBTMemzero(A->rmap->n,btvt);
2363: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2364: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2366: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2367: for (k = 0; k < 2*size; k += 2) {
2368: PetscInt s, p = closure[k], off, dof, cdof;
2370: PetscSectionGetConstraintDof(subSection, p, &cdof);
2371: PetscSectionGetOffset(subSection,p,&off);
2372: PetscSectionGetDof(subSection,p,&dof);
2373: for (s = 0; s < dof-cdof; s++) {
2374: if (PetscBTLookupSet(btvt,off+s)) continue;
2375: if (!PetscBTLookup(btv,off+s)) {
2376: ids[cum++] = off+s;
2377: } else { /* cross-vertex */
2378: pids[cump++] = off+s;
2379: }
2380: }
2381: }
2382: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2383: }
2384: cids[i+1] = cum;
2385: /* mark dofs as already assigned */
2386: for (j = cids[i]; j < cids[i+1]; j++) {
2387: PetscBTSet(btv,ids[j]);
2388: }
2389: }
2390: if (cc) {
2391: PetscMalloc1(graph->ncc,&cc_n);
2392: for (i = 0; i < graph->ncc; i++) {
2393: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2394: }
2395: *cc = cc_n;
2396: }
2397: if (primalv) {
2398: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2399: }
2400: PetscFree3(ids,cids,pids);
2401: PetscBTDestroy(&btv);
2402: PetscBTDestroy(&btvt);
2403: }
2404: } else {
2405: if (ncc) *ncc = graph->ncc;
2406: if (cc) {
2407: PetscMalloc1(graph->ncc,&cc_n);
2408: for (i=0;i<graph->ncc;i++) {
2409: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2410: }
2411: *cc = cc_n;
2412: }
2413: }
2414: /* clean up graph */
2415: graph->xadj = 0;
2416: graph->adjncy = 0;
2417: PCBDDCGraphDestroy(&graph);
2418: return(0);
2419: }
2421: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2422: {
2423: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2424: PC_IS* pcis = (PC_IS*)(pc->data);
2425: IS dirIS = NULL;
2426: PetscInt i;
2430: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2431: if (zerodiag) {
2432: Mat A;
2433: Vec vec3_N;
2434: PetscScalar *vals;
2435: const PetscInt *idxs;
2436: PetscInt nz,*count;
2438: /* p0 */
2439: VecSet(pcis->vec1_N,0.);
2440: PetscMalloc1(pcis->n,&vals);
2441: ISGetLocalSize(zerodiag,&nz);
2442: ISGetIndices(zerodiag,&idxs);
2443: for (i=0;i<nz;i++) vals[i] = 1.;
2444: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2445: VecAssemblyBegin(pcis->vec1_N);
2446: VecAssemblyEnd(pcis->vec1_N);
2447: /* v_I */
2448: VecSetRandom(pcis->vec2_N,NULL);
2449: for (i=0;i<nz;i++) vals[i] = 0.;
2450: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2451: ISRestoreIndices(zerodiag,&idxs);
2452: ISGetIndices(pcis->is_B_local,&idxs);
2453: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2454: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2455: ISRestoreIndices(pcis->is_B_local,&idxs);
2456: if (dirIS) {
2457: PetscInt n;
2459: ISGetLocalSize(dirIS,&n);
2460: ISGetIndices(dirIS,&idxs);
2461: for (i=0;i<n;i++) vals[i] = 0.;
2462: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2463: ISRestoreIndices(dirIS,&idxs);
2464: }
2465: VecAssemblyBegin(pcis->vec2_N);
2466: VecAssemblyEnd(pcis->vec2_N);
2467: VecDuplicate(pcis->vec1_N,&vec3_N);
2468: VecSet(vec3_N,0.);
2469: MatISGetLocalMat(pc->pmat,&A);
2470: MatMult(A,pcis->vec1_N,vec3_N);
2471: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2472: if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2473: PetscFree(vals);
2474: VecDestroy(&vec3_N);
2476: /* there should not be any pressure dofs lying on the interface */
2477: PetscCalloc1(pcis->n,&count);
2478: ISGetIndices(pcis->is_B_local,&idxs);
2479: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2480: ISRestoreIndices(pcis->is_B_local,&idxs);
2481: ISGetIndices(zerodiag,&idxs);
2482: for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]);
2483: ISRestoreIndices(zerodiag,&idxs);
2484: PetscFree(count);
2485: }
2486: ISDestroy(&dirIS);
2488: /* check PCBDDCBenignGetOrSetP0 */
2489: VecSetRandom(pcis->vec1_global,NULL);
2490: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2491: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2492: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2493: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2494: for (i=0;i<pcbddc->benign_n;i++) {
2495: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2496: if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2497: }
2498: return(0);
2499: }
2501: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
2502: {
2503: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2504: IS pressures,zerodiag,zerodiag_save,*zerodiag_subs;
2505: PetscInt nz,n;
2506: PetscInt *interior_dofs,n_interior_dofs,nneu;
2507: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2511: PetscSFDestroy(&pcbddc->benign_sf);
2512: MatDestroy(&pcbddc->benign_B0);
2513: for (n=0;n<pcbddc->benign_n;n++) {
2514: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2515: }
2516: PetscFree(pcbddc->benign_zerodiag_subs);
2517: pcbddc->benign_n = 0;
2519: /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line)
2520: otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
2521: Checks if all the pressure dofs in each subdomain have a zero diagonal
2522: If not, a change of basis on pressures is not needed
2523: since the local Schur complements are already SPD
2524: */
2525: has_null_pressures = PETSC_TRUE;
2526: have_null = PETSC_TRUE;
2527: if (pcbddc->n_ISForDofsLocal) {
2528: IS iP = NULL;
2529: PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
2531: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2532: PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);
2533: PetscOptionsEnd();
2534: if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p);
2535: /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
2536: ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);
2537: ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2538: ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);
2539: ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2540: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2541: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2542: if (iP) {
2543: IS newpressures;
2545: ISDifference(pressures,iP,&newpressures);
2546: ISDestroy(&pressures);
2547: pressures = newpressures;
2548: }
2549: ISSorted(pressures,&sorted);
2550: if (!sorted) {
2551: ISSort(pressures);
2552: }
2553: } else {
2554: pressures = NULL;
2555: }
2556: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2557: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2558: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2559: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2560: ISSorted(zerodiag,&sorted);
2561: if (!sorted) {
2562: ISSort(zerodiag);
2563: }
2564: PetscObjectReference((PetscObject)zerodiag);
2565: zerodiag_save = zerodiag;
2566: ISGetLocalSize(zerodiag,&nz);
2567: if (!nz) {
2568: if (n) have_null = PETSC_FALSE;
2569: has_null_pressures = PETSC_FALSE;
2570: ISDestroy(&zerodiag);
2571: }
2572: recompute_zerodiag = PETSC_FALSE;
2573: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2574: zerodiag_subs = NULL;
2575: pcbddc->benign_n = 0;
2576: n_interior_dofs = 0;
2577: interior_dofs = NULL;
2578: nneu = 0;
2579: if (pcbddc->NeumannBoundariesLocal) {
2580: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2581: }
2582: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2583: if (checkb) { /* need to compute interior nodes */
2584: PetscInt n,i,j;
2585: PetscInt n_neigh,*neigh,*n_shared,**shared;
2586: PetscInt *iwork;
2588: ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2589: ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2590: PetscCalloc1(n,&iwork);
2591: PetscMalloc1(n,&interior_dofs);
2592: for (i=1;i<n_neigh;i++)
2593: for (j=0;j<n_shared[i];j++)
2594: iwork[shared[i][j]] += 1;
2595: for (i=0;i<n;i++)
2596: if (!iwork[i])
2597: interior_dofs[n_interior_dofs++] = i;
2598: PetscFree(iwork);
2599: ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2600: }
2601: if (has_null_pressures) {
2602: IS *subs;
2603: PetscInt nsubs,i,j,nl;
2604: const PetscInt *idxs;
2605: PetscScalar *array;
2606: Vec *work;
2607: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2609: subs = pcbddc->local_subs;
2610: nsubs = pcbddc->n_local_subs;
2611: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2612: if (checkb) {
2613: VecDuplicateVecs(matis->y,2,&work);
2614: ISGetLocalSize(zerodiag,&nl);
2615: ISGetIndices(zerodiag,&idxs);
2616: /* work[0] = 1_p */
2617: VecSet(work[0],0.);
2618: VecGetArray(work[0],&array);
2619: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2620: VecRestoreArray(work[0],&array);
2621: /* work[0] = 1_v */
2622: VecSet(work[1],1.);
2623: VecGetArray(work[1],&array);
2624: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2625: VecRestoreArray(work[1],&array);
2626: ISRestoreIndices(zerodiag,&idxs);
2627: }
2628: if (nsubs > 1) {
2629: PetscCalloc1(nsubs,&zerodiag_subs);
2630: for (i=0;i<nsubs;i++) {
2631: ISLocalToGlobalMapping l2g;
2632: IS t_zerodiag_subs;
2633: PetscInt nl;
2635: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2636: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);
2637: ISGetLocalSize(t_zerodiag_subs,&nl);
2638: if (nl) {
2639: PetscBool valid = PETSC_TRUE;
2641: if (checkb) {
2642: VecSet(matis->x,0);
2643: ISGetLocalSize(subs[i],&nl);
2644: ISGetIndices(subs[i],&idxs);
2645: VecGetArray(matis->x,&array);
2646: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2647: VecRestoreArray(matis->x,&array);
2648: ISRestoreIndices(subs[i],&idxs);
2649: VecPointwiseMult(matis->x,work[0],matis->x);
2650: MatMult(matis->A,matis->x,matis->y);
2651: VecPointwiseMult(matis->y,work[1],matis->y);
2652: VecGetArray(matis->y,&array);
2653: for (j=0;j<n_interior_dofs;j++) {
2654: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2655: valid = PETSC_FALSE;
2656: break;
2657: }
2658: }
2659: VecRestoreArray(matis->y,&array);
2660: }
2661: if (valid && nneu) {
2662: const PetscInt *idxs;
2663: PetscInt nzb;
2665: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2666: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2667: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2668: if (nzb) valid = PETSC_FALSE;
2669: }
2670: if (valid && pressures) {
2671: IS t_pressure_subs;
2672: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2673: ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);
2674: ISDestroy(&t_pressure_subs);
2675: }
2676: if (valid) {
2677: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);
2678: pcbddc->benign_n++;
2679: } else {
2680: recompute_zerodiag = PETSC_TRUE;
2681: }
2682: }
2683: ISDestroy(&t_zerodiag_subs);
2684: ISLocalToGlobalMappingDestroy(&l2g);
2685: }
2686: } else { /* there's just one subdomain (or zero if they have not been detected */
2687: PetscBool valid = PETSC_TRUE;
2689: if (nneu) valid = PETSC_FALSE;
2690: if (valid && pressures) {
2691: ISEqual(pressures,zerodiag,&valid);
2692: }
2693: if (valid && checkb) {
2694: MatMult(matis->A,work[0],matis->x);
2695: VecPointwiseMult(matis->x,work[1],matis->x);
2696: VecGetArray(matis->x,&array);
2697: for (j=0;j<n_interior_dofs;j++) {
2698: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2699: valid = PETSC_FALSE;
2700: break;
2701: }
2702: }
2703: VecRestoreArray(matis->x,&array);
2704: }
2705: if (valid) {
2706: pcbddc->benign_n = 1;
2707: PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);
2708: PetscObjectReference((PetscObject)zerodiag);
2709: zerodiag_subs[0] = zerodiag;
2710: }
2711: }
2712: if (checkb) {
2713: VecDestroyVecs(2,&work);
2714: }
2715: }
2716: PetscFree(interior_dofs);
2718: if (!pcbddc->benign_n) {
2719: PetscInt n;
2721: ISDestroy(&zerodiag);
2722: recompute_zerodiag = PETSC_FALSE;
2723: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2724: if (n) {
2725: has_null_pressures = PETSC_FALSE;
2726: have_null = PETSC_FALSE;
2727: }
2728: }
2730: /* final check for null pressures */
2731: if (zerodiag && pressures) {
2732: PetscInt nz,np;
2733: ISGetLocalSize(zerodiag,&nz);
2734: ISGetLocalSize(pressures,&np);
2735: if (nz != np) have_null = PETSC_FALSE;
2736: }
2738: if (recompute_zerodiag) {
2739: ISDestroy(&zerodiag);
2740: if (pcbddc->benign_n == 1) {
2741: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2742: zerodiag = zerodiag_subs[0];
2743: } else {
2744: PetscInt i,nzn,*new_idxs;
2746: nzn = 0;
2747: for (i=0;i<pcbddc->benign_n;i++) {
2748: PetscInt ns;
2749: ISGetLocalSize(zerodiag_subs[i],&ns);
2750: nzn += ns;
2751: }
2752: PetscMalloc1(nzn,&new_idxs);
2753: nzn = 0;
2754: for (i=0;i<pcbddc->benign_n;i++) {
2755: PetscInt ns,*idxs;
2756: ISGetLocalSize(zerodiag_subs[i],&ns);
2757: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2758: PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2759: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2760: nzn += ns;
2761: }
2762: PetscSortInt(nzn,new_idxs);
2763: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2764: }
2765: have_null = PETSC_FALSE;
2766: }
2768: /* Prepare matrix to compute no-net-flux */
2769: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2770: Mat A,loc_divudotp;
2771: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2772: IS row,col,isused = NULL;
2773: PetscInt M,N,n,st,n_isused;
2775: if (pressures) {
2776: isused = pressures;
2777: } else {
2778: isused = zerodiag_save;
2779: }
2780: MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2781: MatISGetLocalMat(pc->pmat,&A);
2782: MatGetLocalSize(A,&n,NULL);
2783: if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2784: n_isused = 0;
2785: if (isused) {
2786: ISGetLocalSize(isused,&n_isused);
2787: }
2788: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2789: st = st-n_isused;
2790: if (n) {
2791: const PetscInt *gidxs;
2793: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2794: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2795: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2796: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2797: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2798: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2799: } else {
2800: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2801: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2802: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2803: }
2804: MatGetSize(pc->pmat,NULL,&N);
2805: ISGetSize(row,&M);
2806: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2807: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2808: ISDestroy(&row);
2809: ISDestroy(&col);
2810: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2811: MatSetType(pcbddc->divudotp,MATIS);
2812: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2813: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2814: ISLocalToGlobalMappingDestroy(&rl2g);
2815: ISLocalToGlobalMappingDestroy(&cl2g);
2816: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2817: MatDestroy(&loc_divudotp);
2818: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2819: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2820: }
2821: ISDestroy(&zerodiag_save);
2823: /* change of basis and p0 dofs */
2824: if (has_null_pressures) {
2825: IS zerodiagc;
2826: const PetscInt *idxs,*idxsc;
2827: PetscInt i,s,*nnz;
2829: ISGetLocalSize(zerodiag,&nz);
2830: ISComplement(zerodiag,0,n,&zerodiagc);
2831: ISGetIndices(zerodiagc,&idxsc);
2832: /* local change of basis for pressures */
2833: MatDestroy(&pcbddc->benign_change);
2834: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2835: MatSetType(pcbddc->benign_change,MATAIJ);
2836: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2837: PetscMalloc1(n,&nnz);
2838: for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
2839: for (i=0;i<pcbddc->benign_n;i++) {
2840: PetscInt nzs,j;
2842: ISGetLocalSize(zerodiag_subs[i],&nzs);
2843: ISGetIndices(zerodiag_subs[i],&idxs);
2844: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2845: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2846: ISRestoreIndices(zerodiag_subs[i],&idxs);
2847: }
2848: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2849: PetscFree(nnz);
2850: /* set identity on velocities */
2851: for (i=0;i<n-nz;i++) {
2852: MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);
2853: }
2854: ISRestoreIndices(zerodiagc,&idxsc);
2855: ISDestroy(&zerodiagc);
2856: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2857: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2858: /* set change on pressures */
2859: for (s=0;s<pcbddc->benign_n;s++) {
2860: PetscScalar *array;
2861: PetscInt nzs;
2863: ISGetLocalSize(zerodiag_subs[s],&nzs);
2864: ISGetIndices(zerodiag_subs[s],&idxs);
2865: for (i=0;i<nzs-1;i++) {
2866: PetscScalar vals[2];
2867: PetscInt cols[2];
2869: cols[0] = idxs[i];
2870: cols[1] = idxs[nzs-1];
2871: vals[0] = 1.;
2872: vals[1] = 1.;
2873: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2874: }
2875: PetscMalloc1(nzs,&array);
2876: for (i=0;i<nzs-1;i++) array[i] = -1.;
2877: array[nzs-1] = 1.;
2878: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2879: /* store local idxs for p0 */
2880: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2881: ISRestoreIndices(zerodiag_subs[s],&idxs);
2882: PetscFree(array);
2883: }
2884: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2885: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2886: /* project if needed */
2887: if (pcbddc->benign_change_explicit) {
2888: Mat M;
2890: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2891: MatDestroy(&pcbddc->local_mat);
2892: MatSeqAIJCompress(M,&pcbddc->local_mat);
2893: MatDestroy(&M);
2894: }
2895: /* store global idxs for p0 */
2896: ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2897: }
2898: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2899: ISDestroy(&pressures);
2901: /* determines if the coarse solver will be singular or not */
2902: MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2903: /* determines if the problem has subdomains with 0 pressure block */
2904: have_null = (PetscBool)(!!pcbddc->benign_n);
2905: MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2906: *zerodiaglocal = zerodiag;
2907: return(0);
2908: }
2910: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2911: {
2912: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2913: PetscScalar *array;
2917: if (!pcbddc->benign_sf) {
2918: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2919: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2920: }
2921: if (get) {
2922: VecGetArrayRead(v,(const PetscScalar**)&array);
2923: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2924: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2925: VecRestoreArrayRead(v,(const PetscScalar**)&array);
2926: } else {
2927: VecGetArray(v,&array);
2928: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2929: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2930: VecRestoreArray(v,&array);
2931: }
2932: return(0);
2933: }
2935: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2936: {
2937: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2941: /* TODO: add error checking
2942: - avoid nested pop (or push) calls.
2943: - cannot push before pop.
2944: - cannot call this if pcbddc->local_mat is NULL
2945: */
2946: if (!pcbddc->benign_n) {
2947: return(0);
2948: }
2949: if (pop) {
2950: if (pcbddc->benign_change_explicit) {
2951: IS is_p0;
2952: MatReuse reuse;
2954: /* extract B_0 */
2955: reuse = MAT_INITIAL_MATRIX;
2956: if (pcbddc->benign_B0) {
2957: reuse = MAT_REUSE_MATRIX;
2958: }
2959: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
2960: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
2961: /* remove rows and cols from local problem */
2962: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
2963: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2964: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
2965: ISDestroy(&is_p0);
2966: } else {
2967: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2968: PetscScalar *vals;
2969: PetscInt i,n,*idxs_ins;
2971: VecGetLocalSize(matis->y,&n);
2972: PetscMalloc2(n,&idxs_ins,n,&vals);
2973: if (!pcbddc->benign_B0) {
2974: PetscInt *nnz;
2975: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
2976: MatSetType(pcbddc->benign_B0,MATAIJ);
2977: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
2978: PetscMalloc1(pcbddc->benign_n,&nnz);
2979: for (i=0;i<pcbddc->benign_n;i++) {
2980: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
2981: nnz[i] = n - nnz[i];
2982: }
2983: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
2984: PetscFree(nnz);
2985: }
2987: for (i=0;i<pcbddc->benign_n;i++) {
2988: PetscScalar *array;
2989: PetscInt *idxs,j,nz,cum;
2991: VecSet(matis->x,0.);
2992: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
2993: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2994: for (j=0;j<nz;j++) vals[j] = 1.;
2995: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
2996: VecAssemblyBegin(matis->x);
2997: VecAssemblyEnd(matis->x);
2998: VecSet(matis->y,0.);
2999: MatMult(matis->A,matis->x,matis->y);
3000: VecGetArray(matis->y,&array);
3001: cum = 0;
3002: for (j=0;j<n;j++) {
3003: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3004: vals[cum] = array[j];
3005: idxs_ins[cum] = j;
3006: cum++;
3007: }
3008: }
3009: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3010: VecRestoreArray(matis->y,&array);
3011: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3012: }
3013: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3014: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3015: PetscFree2(idxs_ins,vals);
3016: }
3017: } else { /* push */
3018: if (pcbddc->benign_change_explicit) {
3019: PetscInt i;
3021: for (i=0;i<pcbddc->benign_n;i++) {
3022: PetscScalar *B0_vals;
3023: PetscInt *B0_cols,B0_ncol;
3025: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3026: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3027: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3028: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3029: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3030: }
3031: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3032: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3033: } else {
3034: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n");
3035: }
3036: }
3037: return(0);
3038: }
3040: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3041: {
3042: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3043: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3044: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
3045: PetscBLASInt *B_iwork,*B_ifail;
3046: PetscScalar *work,lwork;
3047: PetscScalar *St,*S,*eigv;
3048: PetscScalar *Sarray,*Starray;
3049: PetscReal *eigs,thresh,lthresh,uthresh;
3050: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3051: PetscBool allocated_S_St;
3052: #if defined(PETSC_USE_COMPLEX)
3053: PetscReal *rwork;
3054: #endif
3055: PetscErrorCode ierr;
3058: if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3059: if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3060: if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3062: if (pcbddc->dbg_flag) {
3063: PetscViewerFlush(pcbddc->dbg_viewer);
3064: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3065: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3066: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3067: }
3069: if (pcbddc->dbg_flag) {
3070: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3071: }
3073: /* max size of subsets */
3074: mss = 0;
3075: for (i=0;i<sub_schurs->n_subs;i++) {
3076: PetscInt subset_size;
3078: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3079: mss = PetscMax(mss,subset_size);
3080: }
3082: /* min/max and threshold */
3083: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3084: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3085: nmax = PetscMax(nmin,nmax);
3086: allocated_S_St = PETSC_FALSE;
3087: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3088: allocated_S_St = PETSC_TRUE;
3089: }
3091: /* allocate lapack workspace */
3092: cum = cum2 = 0;
3093: maxneigs = 0;
3094: for (i=0;i<sub_schurs->n_subs;i++) {
3095: PetscInt n,subset_size;
3097: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3098: n = PetscMin(subset_size,nmax);
3099: cum += subset_size;
3100: cum2 += subset_size*n;
3101: maxneigs = PetscMax(maxneigs,n);
3102: }
3103: if (mss) {
3104: if (sub_schurs->is_symmetric) {
3105: PetscBLASInt B_itype = 1;
3106: PetscBLASInt B_N = mss;
3107: PetscReal zero = 0.0;
3108: PetscReal eps = 0.0; /* dlamch? */
3110: B_lwork = -1;
3111: S = NULL;
3112: St = NULL;
3113: eigs = NULL;
3114: eigv = NULL;
3115: B_iwork = NULL;
3116: B_ifail = NULL;
3117: #if defined(PETSC_USE_COMPLEX)
3118: rwork = NULL;
3119: #endif
3120: thresh = 1.0;
3121: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3122: #if defined(PETSC_USE_COMPLEX)
3123: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3124: #else
3125: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3126: #endif
3127: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3128: PetscFPTrapPop();
3129: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3130: } else {
3131: lwork = 0;
3132: }
3134: nv = 0;
3135: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3136: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3137: }
3138: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3139: if (allocated_S_St) {
3140: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3141: }
3142: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3143: #if defined(PETSC_USE_COMPLEX)
3144: PetscMalloc1(7*mss,&rwork);
3145: #endif
3146: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3147: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3148: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3149: nv+cum,&pcbddc->adaptive_constraints_idxs,
3150: nv+cum2,&pcbddc->adaptive_constraints_data);
3151: PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));
3153: maxneigs = 0;
3154: cum = cumarray = 0;
3155: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3156: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3157: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3158: const PetscInt *idxs;
3160: ISGetIndices(sub_schurs->is_vertices,&idxs);
3161: for (cum=0;cum<nv;cum++) {
3162: pcbddc->adaptive_constraints_n[cum] = 1;
3163: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3164: pcbddc->adaptive_constraints_data[cum] = 1.0;
3165: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3166: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3167: }
3168: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3169: }
3171: if (mss) { /* multilevel */
3172: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3173: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3174: }
3176: lthresh = pcbddc->adaptive_threshold[0];
3177: uthresh = pcbddc->adaptive_threshold[1];
3178: for (i=0;i<sub_schurs->n_subs;i++) {
3179: const PetscInt *idxs;
3180: PetscReal upper,lower;
3181: PetscInt j,subset_size,eigs_start = 0;
3182: PetscBLASInt B_N;
3183: PetscBool same_data = PETSC_FALSE;
3184: PetscBool scal = PETSC_FALSE;
3186: if (pcbddc->use_deluxe_scaling) {
3187: upper = PETSC_MAX_REAL;
3188: lower = uthresh;
3189: } else {
3190: if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3191: upper = 1./uthresh;
3192: lower = 0.;
3193: }
3194: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3195: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3196: PetscBLASIntCast(subset_size,&B_N);
3197: /* this is experimental: we assume the dofs have been properly grouped to have
3198: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3199: if (!sub_schurs->is_posdef) {
3200: Mat T;
3202: for (j=0;j<subset_size;j++) {
3203: if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3204: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3205: MatScale(T,-1.0);
3206: MatDestroy(&T);
3207: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3208: MatScale(T,-1.0);
3209: MatDestroy(&T);
3210: if (sub_schurs->change_primal_sub) {
3211: PetscInt nz,k;
3212: const PetscInt *idxs;
3214: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3215: ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3216: for (k=0;k<nz;k++) {
3217: *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3218: *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0;
3219: }
3220: ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3221: }
3222: scal = PETSC_TRUE;
3223: break;
3224: }
3225: }
3226: }
3228: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3229: if (sub_schurs->is_symmetric) {
3230: PetscInt j,k;
3231: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3232: PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3233: PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3234: }
3235: for (j=0;j<subset_size;j++) {
3236: for (k=j;k<subset_size;k++) {
3237: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3238: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3239: }
3240: }
3241: } else {
3242: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3243: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3244: }
3245: } else {
3246: S = Sarray + cumarray;
3247: St = Starray + cumarray;
3248: }
3249: /* see if we can save some work */
3250: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3251: PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3252: }
3254: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3255: B_neigs = 0;
3256: } else {
3257: if (sub_schurs->is_symmetric) {
3258: PetscBLASInt B_itype = 1;
3259: PetscBLASInt B_IL, B_IU;
3260: PetscReal eps = -1.0; /* dlamch? */
3261: PetscInt nmin_s;
3262: PetscBool compute_range;
3264: B_neigs = 0;
3265: compute_range = (PetscBool)!same_data;
3266: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3268: if (pcbddc->dbg_flag) {
3269: PetscInt nc = 0;
3271: if (sub_schurs->change_primal_sub) {
3272: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3273: }
3274: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d) (change %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3275: }
3277: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3278: if (compute_range) {
3280: /* ask for eigenvalues larger than thresh */
3281: if (sub_schurs->is_posdef) {
3282: #if defined(PETSC_USE_COMPLEX)
3283: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3284: #else
3285: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3286: #endif
3287: } else { /* no theory so far, but it works nicely */
3288: PetscInt recipe = 0,recipe_m = 1;
3289: PetscReal bb[2];
3291: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3292: switch (recipe) {
3293: case 0:
3294: if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3295: else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3296: #if defined(PETSC_USE_COMPLEX)
3297: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3298: #else
3299: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3300: #endif
3301: break;
3302: case 1:
3303: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3304: #if defined(PETSC_USE_COMPLEX)
3305: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3306: #else
3307: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3308: #endif
3309: if (!scal) {
3310: PetscBLASInt B_neigs2 = 0;
3312: bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3313: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3314: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3315: #if defined(PETSC_USE_COMPLEX)
3316: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3317: #else
3318: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3319: #endif
3320: B_neigs += B_neigs2;
3321: }
3322: break;
3323: case 2:
3324: if (scal) {
3325: bb[0] = PETSC_MIN_REAL;
3326: bb[1] = 0;
3327: #if defined(PETSC_USE_COMPLEX)
3328: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3329: #else
3330: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3331: #endif
3332: } else {
3333: PetscBLASInt B_neigs2 = 0;
3334: PetscBool import = PETSC_FALSE;
3336: lthresh = PetscMax(lthresh,0.0);
3337: if (lthresh > 0.0) {
3338: bb[0] = PETSC_MIN_REAL;
3339: bb[1] = lthresh*lthresh;
3341: import = PETSC_TRUE;
3342: #if defined(PETSC_USE_COMPLEX)
3343: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3344: #else
3345: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3346: #endif
3347: }
3348: bb[0] = PetscMax(lthresh*lthresh,uthresh);
3349: bb[1] = PETSC_MAX_REAL;
3350: if (import) {
3351: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3352: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3353: }
3354: #if defined(PETSC_USE_COMPLEX)
3355: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3356: #else
3357: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3358: #endif
3359: B_neigs += B_neigs2;
3360: }
3361: break;
3362: case 3:
3363: if (scal) {
3364: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3365: } else {
3366: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3367: }
3368: if (!scal) {
3369: bb[0] = uthresh;
3370: bb[1] = PETSC_MAX_REAL;
3371: #if defined(PETSC_USE_COMPLEX)
3372: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3373: #else
3374: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3375: #endif
3376: }
3377: if (recipe_m > 0 && B_N - B_neigs > 0) {
3378: PetscBLASInt B_neigs2 = 0;
3380: B_IL = 1;
3381: PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3382: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3383: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3384: #if defined(PETSC_USE_COMPLEX)
3385: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3386: #else
3387: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3388: #endif
3389: B_neigs += B_neigs2;
3390: }
3391: break;
3392: case 4:
3393: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3394: #if defined(PETSC_USE_COMPLEX)
3395: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3396: #else
3397: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3398: #endif
3399: {
3400: PetscBLASInt B_neigs2 = 0;
3402: bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3403: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3404: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3405: #if defined(PETSC_USE_COMPLEX)
3406: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3407: #else
3408: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3409: #endif
3410: B_neigs += B_neigs2;
3411: }
3412: break;
3413: case 5: /* same as before: first compute all eigenvalues, then filter */
3414: #if defined(PETSC_USE_COMPLEX)
3415: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3416: #else
3417: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3418: #endif
3419: {
3420: PetscInt e,k,ne;
3421: for (e=0,ne=0;e<B_neigs;e++) {
3422: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3423: for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3424: eigs[ne] = eigs[e];
3425: ne++;
3426: }
3427: }
3428: PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));
3429: B_neigs = ne;
3430: }
3431: break;
3432: default:
3433: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3434: break;
3435: }
3436: }
3437: } else if (!same_data) { /* this is just to see all the eigenvalues */
3438: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3439: B_IL = 1;
3440: #if defined(PETSC_USE_COMPLEX)
3441: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3442: #else
3443: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3444: #endif
3445: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3446: PetscInt k;
3447: if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3448: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3449: PetscBLASIntCast(nmax,&B_neigs);
3450: nmin = nmax;
3451: PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3452: for (k=0;k<nmax;k++) {
3453: eigs[k] = 1./PETSC_SMALL;
3454: eigv[k*(subset_size+1)] = 1.0;
3455: }
3456: }
3457: PetscFPTrapPop();
3458: if (B_ierr) {
3459: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3460: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3461: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3462: }
3464: if (B_neigs > nmax) {
3465: if (pcbddc->dbg_flag) {
3466: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
3467: }
3468: if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3469: B_neigs = nmax;
3470: }
3472: nmin_s = PetscMin(nmin,B_N);
3473: if (B_neigs < nmin_s) {
3474: PetscBLASInt B_neigs2 = 0;
3476: if (pcbddc->use_deluxe_scaling) {
3477: if (scal) {
3478: B_IU = nmin_s;
3479: B_IL = B_neigs + 1;
3480: } else {
3481: B_IL = B_N - nmin_s + 1;
3482: B_IU = B_N - B_neigs;
3483: }
3484: } else {
3485: B_IL = B_neigs + 1;
3486: B_IU = nmin_s;
3487: }
3488: if (pcbddc->dbg_flag) {
3489: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3490: }
3491: if (sub_schurs->is_symmetric) {
3492: PetscInt j,k;
3493: for (j=0;j<subset_size;j++) {
3494: for (k=j;k<subset_size;k++) {
3495: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3496: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3497: }
3498: }
3499: } else {
3500: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3501: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3502: }
3503: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3504: #if defined(PETSC_USE_COMPLEX)
3505: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3506: #else
3507: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3508: #endif
3509: PetscFPTrapPop();
3510: B_neigs += B_neigs2;
3511: }
3512: if (B_ierr) {
3513: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3514: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3515: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3516: }
3517: if (pcbddc->dbg_flag) {
3518: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3519: for (j=0;j<B_neigs;j++) {
3520: if (eigs[j] == 0.0) {
3521: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3522: } else {
3523: if (pcbddc->use_deluxe_scaling) {
3524: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3525: } else {
3526: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3527: }
3528: }
3529: }
3530: }
3531: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3532: }
3533: /* change the basis back to the original one */
3534: if (sub_schurs->change) {
3535: Mat change,phi,phit;
3537: if (pcbddc->dbg_flag > 2) {
3538: PetscInt ii;
3539: for (ii=0;ii<B_neigs;ii++) {
3540: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3541: for (j=0;j<B_N;j++) {
3542: #if defined(PETSC_USE_COMPLEX)
3543: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3544: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3545: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3546: #else
3547: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3548: #endif
3549: }
3550: }
3551: }
3552: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3553: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3554: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3555: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3556: MatDestroy(&phit);
3557: MatDestroy(&phi);
3558: }
3559: maxneigs = PetscMax(B_neigs,maxneigs);
3560: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3561: if (B_neigs) {
3562: PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));
3564: if (pcbddc->dbg_flag > 1) {
3565: PetscInt ii;
3566: for (ii=0;ii<B_neigs;ii++) {
3567: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3568: for (j=0;j<B_N;j++) {
3569: #if defined(PETSC_USE_COMPLEX)
3570: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3571: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3572: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3573: #else
3574: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3575: #endif
3576: }
3577: }
3578: }
3579: PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3580: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3581: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3582: cum++;
3583: }
3584: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3585: /* shift for next computation */
3586: cumarray += subset_size*subset_size;
3587: }
3588: if (pcbddc->dbg_flag) {
3589: PetscViewerFlush(pcbddc->dbg_viewer);
3590: }
3592: if (mss) {
3593: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3594: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3595: /* destroy matrices (junk) */
3596: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3597: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3598: }
3599: if (allocated_S_St) {
3600: PetscFree2(S,St);
3601: }
3602: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3603: #if defined(PETSC_USE_COMPLEX)
3604: PetscFree(rwork);
3605: #endif
3606: if (pcbddc->dbg_flag) {
3607: PetscInt maxneigs_r;
3608: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3609: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
3610: }
3611: return(0);
3612: }
3614: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3615: {
3616: PetscScalar *coarse_submat_vals;
3620: /* Setup local scatters R_to_B and (optionally) R_to_D */
3621: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3622: PCBDDCSetUpLocalScatters(pc);
3624: /* Setup local neumann solver ksp_R */
3625: /* PCBDDCSetUpLocalScatters should be called first! */
3626: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3628: /*
3629: Setup local correction and local part of coarse basis.
3630: Gives back the dense local part of the coarse matrix in column major ordering
3631: */
3632: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3634: /* Compute total number of coarse nodes and setup coarse solver */
3635: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3637: /* free */
3638: PetscFree(coarse_submat_vals);
3639: return(0);
3640: }
3642: PetscErrorCode PCBDDCResetCustomization(PC pc)
3643: {
3644: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3648: ISDestroy(&pcbddc->user_primal_vertices);
3649: ISDestroy(&pcbddc->user_primal_vertices_local);
3650: ISDestroy(&pcbddc->NeumannBoundaries);
3651: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3652: ISDestroy(&pcbddc->DirichletBoundaries);
3653: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3654: PetscFree(pcbddc->onearnullvecs_state);
3655: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3656: PCBDDCSetDofsSplitting(pc,0,NULL);
3657: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3658: return(0);
3659: }
3661: PetscErrorCode PCBDDCResetTopography(PC pc)
3662: {
3663: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3664: PetscInt i;
3668: MatDestroy(&pcbddc->nedcG);
3669: ISDestroy(&pcbddc->nedclocal);
3670: MatDestroy(&pcbddc->discretegradient);
3671: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3672: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3673: MatDestroy(&pcbddc->switch_static_change);
3674: VecDestroy(&pcbddc->work_change);
3675: MatDestroy(&pcbddc->ConstraintMatrix);
3676: MatDestroy(&pcbddc->divudotp);
3677: ISDestroy(&pcbddc->divudotp_vl2l);
3678: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3679: for (i=0;i<pcbddc->n_local_subs;i++) {
3680: ISDestroy(&pcbddc->local_subs[i]);
3681: }
3682: pcbddc->n_local_subs = 0;
3683: PetscFree(pcbddc->local_subs);
3684: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3685: pcbddc->graphanalyzed = PETSC_FALSE;
3686: pcbddc->recompute_topography = PETSC_TRUE;
3687: pcbddc->corner_selected = PETSC_FALSE;
3688: return(0);
3689: }
3691: PetscErrorCode PCBDDCResetSolvers(PC pc)
3692: {
3693: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3697: VecDestroy(&pcbddc->coarse_vec);
3698: if (pcbddc->coarse_phi_B) {
3699: PetscScalar *array;
3700: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3701: PetscFree(array);
3702: }
3703: MatDestroy(&pcbddc->coarse_phi_B);
3704: MatDestroy(&pcbddc->coarse_phi_D);
3705: MatDestroy(&pcbddc->coarse_psi_B);
3706: MatDestroy(&pcbddc->coarse_psi_D);
3707: VecDestroy(&pcbddc->vec1_P);
3708: VecDestroy(&pcbddc->vec1_C);
3709: MatDestroy(&pcbddc->local_auxmat2);
3710: MatDestroy(&pcbddc->local_auxmat1);
3711: VecDestroy(&pcbddc->vec1_R);
3712: VecDestroy(&pcbddc->vec2_R);
3713: ISDestroy(&pcbddc->is_R_local);
3714: VecScatterDestroy(&pcbddc->R_to_B);
3715: VecScatterDestroy(&pcbddc->R_to_D);
3716: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3717: KSPReset(pcbddc->ksp_D);
3718: KSPReset(pcbddc->ksp_R);
3719: KSPReset(pcbddc->coarse_ksp);
3720: MatDestroy(&pcbddc->local_mat);
3721: PetscFree(pcbddc->primal_indices_local_idxs);
3722: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3723: PetscFree(pcbddc->global_primal_indices);
3724: ISDestroy(&pcbddc->coarse_subassembling);
3725: MatDestroy(&pcbddc->benign_change);
3726: VecDestroy(&pcbddc->benign_vec);
3727: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3728: MatDestroy(&pcbddc->benign_B0);
3729: PetscSFDestroy(&pcbddc->benign_sf);
3730: if (pcbddc->benign_zerodiag_subs) {
3731: PetscInt i;
3732: for (i=0;i<pcbddc->benign_n;i++) {
3733: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3734: }
3735: PetscFree(pcbddc->benign_zerodiag_subs);
3736: }
3737: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3738: return(0);
3739: }
3741: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3742: {
3743: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3744: PC_IS *pcis = (PC_IS*)pc->data;
3745: VecType impVecType;
3746: PetscInt n_constraints,n_R,old_size;
3750: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3751: n_R = pcis->n - pcbddc->n_vertices;
3752: VecGetType(pcis->vec1_N,&impVecType);
3753: /* local work vectors (try to avoid unneeded work)*/
3754: /* R nodes */
3755: old_size = -1;
3756: if (pcbddc->vec1_R) {
3757: VecGetSize(pcbddc->vec1_R,&old_size);
3758: }
3759: if (n_R != old_size) {
3760: VecDestroy(&pcbddc->vec1_R);
3761: VecDestroy(&pcbddc->vec2_R);
3762: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3763: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3764: VecSetType(pcbddc->vec1_R,impVecType);
3765: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3766: }
3767: /* local primal dofs */
3768: old_size = -1;
3769: if (pcbddc->vec1_P) {
3770: VecGetSize(pcbddc->vec1_P,&old_size);
3771: }
3772: if (pcbddc->local_primal_size != old_size) {
3773: VecDestroy(&pcbddc->vec1_P);
3774: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3775: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3776: VecSetType(pcbddc->vec1_P,impVecType);
3777: }
3778: /* local explicit constraints */
3779: old_size = -1;
3780: if (pcbddc->vec1_C) {
3781: VecGetSize(pcbddc->vec1_C,&old_size);
3782: }
3783: if (n_constraints && n_constraints != old_size) {
3784: VecDestroy(&pcbddc->vec1_C);
3785: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3786: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3787: VecSetType(pcbddc->vec1_C,impVecType);
3788: }
3789: return(0);
3790: }
3792: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3793: {
3794: PetscErrorCode ierr;
3795: /* pointers to pcis and pcbddc */
3796: PC_IS* pcis = (PC_IS*)pc->data;
3797: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3798: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3799: /* submatrices of local problem */
3800: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3801: /* submatrices of local coarse problem */
3802: Mat S_VV,S_CV,S_VC,S_CC;
3803: /* working matrices */
3804: Mat C_CR;
3805: /* additional working stuff */
3806: PC pc_R;
3807: Mat F,Brhs = NULL;
3808: Vec dummy_vec;
3809: PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3810: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3811: PetscScalar *work;
3812: PetscInt *idx_V_B;
3813: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3814: PetscInt i,n_R,n_D,n_B;
3816: /* some shortcuts to scalars */
3817: PetscScalar one=1.0,m_one=-1.0;
3820: if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3822: /* Set Non-overlapping dimensions */
3823: n_vertices = pcbddc->n_vertices;
3824: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3825: n_B = pcis->n_B;
3826: n_D = pcis->n - n_B;
3827: n_R = pcis->n - n_vertices;
3829: /* vertices in boundary numbering */
3830: PetscMalloc1(n_vertices,&idx_V_B);
3831: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3832: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);
3834: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3835: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3836: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3837: MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3838: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3839: MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3840: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3841: MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3842: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3843: MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);
3845: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3846: KSPGetPC(pcbddc->ksp_R,&pc_R);
3847: PCSetUp(pc_R);
3848: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3849: PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3850: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3851: lda_rhs = n_R;
3852: need_benign_correction = PETSC_FALSE;
3853: if (isLU || isILU || isCHOL) {
3854: PCFactorGetMatrix(pc_R,&F);
3855: } else if (sub_schurs && sub_schurs->reuse_solver) {
3856: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3857: MatFactorType type;
3859: F = reuse_solver->F;
3860: MatGetFactorType(F,&type);
3861: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3862: MatGetSize(F,&lda_rhs,NULL);
3863: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3864: } else {
3865: F = NULL;
3866: }
3868: /* determine if we can use a sparse right-hand side */
3869: sparserhs = PETSC_FALSE;
3870: if (F) {
3871: MatSolverType solver;
3873: MatFactorGetSolverType(F,&solver);
3874: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3875: }
3877: /* allocate workspace */
3878: n = 0;
3879: if (n_constraints) {
3880: n += lda_rhs*n_constraints;
3881: }
3882: if (n_vertices) {
3883: n = PetscMax(2*lda_rhs*n_vertices,n);
3884: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3885: }
3886: if (!pcbddc->symmetric_primal) {
3887: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3888: }
3889: PetscMalloc1(n,&work);
3891: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3892: dummy_vec = NULL;
3893: if (need_benign_correction && lda_rhs != n_R && F) {
3894: VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);
3895: }
3897: /* Precompute stuffs needed for preprocessing and application of BDDC*/
3898: if (n_constraints) {
3899: Mat M3,C_B;
3900: IS is_aux;
3901: PetscScalar *array,*array2;
3903: MatDestroy(&pcbddc->local_auxmat1);
3904: MatDestroy(&pcbddc->local_auxmat2);
3906: /* Extract constraints on R nodes: C_{CR} */
3907: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3908: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3909: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
3911: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3912: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3913: if (!sparserhs) {
3914: PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
3915: for (i=0;i<n_constraints;i++) {
3916: const PetscScalar *row_cmat_values;
3917: const PetscInt *row_cmat_indices;
3918: PetscInt size_of_constraint,j;
3920: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3921: for (j=0;j<size_of_constraint;j++) {
3922: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3923: }
3924: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3925: }
3926: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3927: } else {
3928: Mat tC_CR;
3930: MatScale(C_CR,-1.0);
3931: if (lda_rhs != n_R) {
3932: PetscScalar *aa;
3933: PetscInt r,*ii,*jj;
3934: PetscBool done;
3936: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3937: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3938: MatSeqAIJGetArray(C_CR,&aa);
3939: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
3940: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3941: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3942: } else {
3943: PetscObjectReference((PetscObject)C_CR);
3944: tC_CR = C_CR;
3945: }
3946: MatCreateTranspose(tC_CR,&Brhs);
3947: MatDestroy(&tC_CR);
3948: }
3949: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
3950: if (F) {
3951: if (need_benign_correction) {
3952: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3954: /* rhs is already zero on interior dofs, no need to change the rhs */
3955: PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
3956: }
3957: MatMatSolve(F,Brhs,local_auxmat2_R);
3958: if (need_benign_correction) {
3959: PetscScalar *marr;
3960: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3962: MatDenseGetArray(local_auxmat2_R,&marr);
3963: if (lda_rhs != n_R) {
3964: for (i=0;i<n_constraints;i++) {
3965: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3966: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3967: VecResetArray(dummy_vec);
3968: }
3969: } else {
3970: for (i=0;i<n_constraints;i++) {
3971: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3972: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3973: VecResetArray(pcbddc->vec1_R);
3974: }
3975: }
3976: MatDenseRestoreArray(local_auxmat2_R,&marr);
3977: }
3978: } else {
3979: PetscScalar *marr;
3981: MatDenseGetArray(local_auxmat2_R,&marr);
3982: for (i=0;i<n_constraints;i++) {
3983: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
3984: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
3985: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3986: VecResetArray(pcbddc->vec1_R);
3987: VecResetArray(pcbddc->vec2_R);
3988: }
3989: MatDenseRestoreArray(local_auxmat2_R,&marr);
3990: }
3991: if (sparserhs) {
3992: MatScale(C_CR,-1.0);
3993: }
3994: MatDestroy(&Brhs);
3995: if (!pcbddc->switch_static) {
3996: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
3997: MatDenseGetArray(pcbddc->local_auxmat2,&array);
3998: MatDenseGetArray(local_auxmat2_R,&array2);
3999: for (i=0;i<n_constraints;i++) {
4000: VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4001: VecPlaceArray(pcis->vec1_B,array+i*n_B);
4002: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4003: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4004: VecResetArray(pcis->vec1_B);
4005: VecResetArray(pcbddc->vec1_R);
4006: }
4007: MatDenseRestoreArray(local_auxmat2_R,&array2);
4008: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4009: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4010: } else {
4011: if (lda_rhs != n_R) {
4012: IS dummy;
4014: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4015: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4016: ISDestroy(&dummy);
4017: } else {
4018: PetscObjectReference((PetscObject)local_auxmat2_R);
4019: pcbddc->local_auxmat2 = local_auxmat2_R;
4020: }
4021: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4022: }
4023: ISDestroy(&is_aux);
4024: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
4025: MatScale(M3,m_one);
4026: if (isCHOL) {
4027: MatCholeskyFactor(M3,NULL,NULL);
4028: } else {
4029: MatLUFactor(M3,NULL,NULL,NULL);
4030: }
4031: MatSeqDenseInvertFactors_Private(M3);
4032: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4033: MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4034: MatDestroy(&C_B);
4035: MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4036: MatDestroy(&M3);
4037: }
4039: /* Get submatrices from subdomain matrix */
4040: if (n_vertices) {
4041: IS is_aux;
4042: PetscBool isseqaij;
4044: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4045: IS tis;
4047: ISDuplicate(pcbddc->is_R_local,&tis);
4048: ISSort(tis);
4049: ISComplement(tis,0,pcis->n,&is_aux);
4050: ISDestroy(&tis);
4051: } else {
4052: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4053: }
4054: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4055: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4056: PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
4057: if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
4058: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4059: }
4060: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4061: ISDestroy(&is_aux);
4062: }
4064: /* Matrix of coarse basis functions (local) */
4065: if (pcbddc->coarse_phi_B) {
4066: PetscInt on_B,on_primal,on_D=n_D;
4067: if (pcbddc->coarse_phi_D) {
4068: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4069: }
4070: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4071: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4072: PetscScalar *marray;
4074: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4075: PetscFree(marray);
4076: MatDestroy(&pcbddc->coarse_phi_B);
4077: MatDestroy(&pcbddc->coarse_psi_B);
4078: MatDestroy(&pcbddc->coarse_phi_D);
4079: MatDestroy(&pcbddc->coarse_psi_D);
4080: }
4081: }
4083: if (!pcbddc->coarse_phi_B) {
4084: PetscScalar *marr;
4086: /* memory size */
4087: n = n_B*pcbddc->local_primal_size;
4088: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4089: if (!pcbddc->symmetric_primal) n *= 2;
4090: PetscCalloc1(n,&marr);
4091: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4092: marr += n_B*pcbddc->local_primal_size;
4093: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4094: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4095: marr += n_D*pcbddc->local_primal_size;
4096: }
4097: if (!pcbddc->symmetric_primal) {
4098: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4099: marr += n_B*pcbddc->local_primal_size;
4100: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4101: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4102: }
4103: } else {
4104: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4105: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4106: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4107: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4108: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4109: }
4110: }
4111: }
4113: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4114: p0_lidx_I = NULL;
4115: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4116: const PetscInt *idxs;
4118: ISGetIndices(pcis->is_I_local,&idxs);
4119: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4120: for (i=0;i<pcbddc->benign_n;i++) {
4121: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4122: }
4123: ISRestoreIndices(pcis->is_I_local,&idxs);
4124: }
4126: /* vertices */
4127: if (n_vertices) {
4128: PetscBool restoreavr = PETSC_FALSE;
4130: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
4132: if (n_R) {
4133: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4134: PetscBLASInt B_N,B_one = 1;
4135: PetscScalar *x,*y;
4137: MatScale(A_RV,m_one);
4138: if (need_benign_correction) {
4139: ISLocalToGlobalMapping RtoN;
4140: IS is_p0;
4141: PetscInt *idxs_p0,n;
4143: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4144: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4145: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4146: if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n);
4147: ISLocalToGlobalMappingDestroy(&RtoN);
4148: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4149: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4150: ISDestroy(&is_p0);
4151: }
4153: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4154: if (!sparserhs || need_benign_correction) {
4155: if (lda_rhs == n_R) {
4156: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4157: } else {
4158: PetscScalar *av,*array;
4159: const PetscInt *xadj,*adjncy;
4160: PetscInt n;
4161: PetscBool flg_row;
4163: array = work+lda_rhs*n_vertices;
4164: PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
4165: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4166: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4167: MatSeqAIJGetArray(A_RV,&av);
4168: for (i=0;i<n;i++) {
4169: PetscInt j;
4170: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4171: }
4172: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4173: MatDestroy(&A_RV);
4174: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4175: }
4176: if (need_benign_correction) {
4177: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4178: PetscScalar *marr;
4180: MatDenseGetArray(A_RV,&marr);
4181: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4183: | 0 0 0 | (V)
4184: L = | 0 0 -1 | (P-p0)
4185: | 0 0 -1 | (p0)
4187: */
4188: for (i=0;i<reuse_solver->benign_n;i++) {
4189: const PetscScalar *vals;
4190: const PetscInt *idxs,*idxs_zero;
4191: PetscInt n,j,nz;
4193: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4194: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4195: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4196: for (j=0;j<n;j++) {
4197: PetscScalar val = vals[j];
4198: PetscInt k,col = idxs[j];
4199: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4200: }
4201: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4202: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4203: }
4204: MatDenseRestoreArray(A_RV,&marr);
4205: }
4206: PetscObjectReference((PetscObject)A_RV);
4207: Brhs = A_RV;
4208: } else {
4209: Mat tA_RVT,A_RVT;
4211: if (!pcbddc->symmetric_primal) {
4212: /* A_RV already scaled by -1 */
4213: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4214: } else {
4215: restoreavr = PETSC_TRUE;
4216: MatScale(A_VR,-1.0);
4217: PetscObjectReference((PetscObject)A_VR);
4218: A_RVT = A_VR;
4219: }
4220: if (lda_rhs != n_R) {
4221: PetscScalar *aa;
4222: PetscInt r,*ii,*jj;
4223: PetscBool done;
4225: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4226: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4227: MatSeqAIJGetArray(A_RVT,&aa);
4228: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4229: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4230: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4231: } else {
4232: PetscObjectReference((PetscObject)A_RVT);
4233: tA_RVT = A_RVT;
4234: }
4235: MatCreateTranspose(tA_RVT,&Brhs);
4236: MatDestroy(&tA_RVT);
4237: MatDestroy(&A_RVT);
4238: }
4239: if (F) {
4240: /* need to correct the rhs */
4241: if (need_benign_correction) {
4242: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4243: PetscScalar *marr;
4245: MatDenseGetArray(Brhs,&marr);
4246: if (lda_rhs != n_R) {
4247: for (i=0;i<n_vertices;i++) {
4248: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4249: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4250: VecResetArray(dummy_vec);
4251: }
4252: } else {
4253: for (i=0;i<n_vertices;i++) {
4254: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4255: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4256: VecResetArray(pcbddc->vec1_R);
4257: }
4258: }
4259: MatDenseRestoreArray(Brhs,&marr);
4260: }
4261: MatMatSolve(F,Brhs,A_RRmA_RV);
4262: if (restoreavr) {
4263: MatScale(A_VR,-1.0);
4264: }
4265: /* need to correct the solution */
4266: if (need_benign_correction) {
4267: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4268: PetscScalar *marr;
4270: MatDenseGetArray(A_RRmA_RV,&marr);
4271: if (lda_rhs != n_R) {
4272: for (i=0;i<n_vertices;i++) {
4273: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4274: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4275: VecResetArray(dummy_vec);
4276: }
4277: } else {
4278: for (i=0;i<n_vertices;i++) {
4279: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4280: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4281: VecResetArray(pcbddc->vec1_R);
4282: }
4283: }
4284: MatDenseRestoreArray(A_RRmA_RV,&marr);
4285: }
4286: } else {
4287: MatDenseGetArray(Brhs,&y);
4288: for (i=0;i<n_vertices;i++) {
4289: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4290: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4291: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4292: VecResetArray(pcbddc->vec1_R);
4293: VecResetArray(pcbddc->vec2_R);
4294: }
4295: MatDenseRestoreArray(Brhs,&y);
4296: }
4297: MatDestroy(&A_RV);
4298: MatDestroy(&Brhs);
4299: /* S_VV and S_CV */
4300: if (n_constraints) {
4301: Mat B;
4303: PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4304: for (i=0;i<n_vertices;i++) {
4305: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4306: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4307: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4308: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4309: VecResetArray(pcis->vec1_B);
4310: VecResetArray(pcbddc->vec1_R);
4311: }
4312: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4313: MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4314: MatDestroy(&B);
4315: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4316: MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4317: MatScale(S_CV,m_one);
4318: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4319: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4320: MatDestroy(&B);
4321: }
4322: if (lda_rhs != n_R) {
4323: MatDestroy(&A_RRmA_RV);
4324: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4325: MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4326: }
4327: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4328: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4329: if (need_benign_correction) {
4330: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4331: PetscScalar *marr,*sums;
4333: PetscMalloc1(n_vertices,&sums);
4334: MatDenseGetArray(S_VVt,&marr);
4335: for (i=0;i<reuse_solver->benign_n;i++) {
4336: const PetscScalar *vals;
4337: const PetscInt *idxs,*idxs_zero;
4338: PetscInt n,j,nz;
4340: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4341: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4342: for (j=0;j<n_vertices;j++) {
4343: PetscInt k;
4344: sums[j] = 0.;
4345: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4346: }
4347: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4348: for (j=0;j<n;j++) {
4349: PetscScalar val = vals[j];
4350: PetscInt k;
4351: for (k=0;k<n_vertices;k++) {
4352: marr[idxs[j]+k*n_vertices] += val*sums[k];
4353: }
4354: }
4355: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4356: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4357: }
4358: PetscFree(sums);
4359: MatDenseRestoreArray(S_VVt,&marr);
4360: MatDestroy(&A_RV_bcorr);
4361: }
4362: MatDestroy(&A_RRmA_RV);
4363: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4364: MatDenseGetArray(A_VV,&x);
4365: MatDenseGetArray(S_VVt,&y);
4366: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4367: MatDenseRestoreArray(A_VV,&x);
4368: MatDenseRestoreArray(S_VVt,&y);
4369: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4370: MatDestroy(&S_VVt);
4371: } else {
4372: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4373: }
4374: MatDestroy(&A_VV);
4376: /* coarse basis functions */
4377: for (i=0;i<n_vertices;i++) {
4378: PetscScalar *y;
4380: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4381: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4382: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4383: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4384: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4385: y[n_B*i+idx_V_B[i]] = 1.0;
4386: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4387: VecResetArray(pcis->vec1_B);
4389: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4390: PetscInt j;
4392: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4393: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4394: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4395: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4396: VecResetArray(pcis->vec1_D);
4397: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4398: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4399: }
4400: VecResetArray(pcbddc->vec1_R);
4401: }
4402: /* if n_R == 0 the object is not destroyed */
4403: MatDestroy(&A_RV);
4404: }
4405: VecDestroy(&dummy_vec);
4407: if (n_constraints) {
4408: Mat B;
4410: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4411: MatScale(S_CC,m_one);
4412: MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4413: MatScale(S_CC,m_one);
4414: if (n_vertices) {
4415: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4416: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4417: } else {
4418: Mat S_VCt;
4420: if (lda_rhs != n_R) {
4421: MatDestroy(&B);
4422: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4423: MatSeqDenseSetLDA(B,lda_rhs);
4424: }
4425: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4426: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4427: MatDestroy(&S_VCt);
4428: }
4429: }
4430: MatDestroy(&B);
4431: /* coarse basis functions */
4432: for (i=0;i<n_constraints;i++) {
4433: PetscScalar *y;
4435: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4436: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4437: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4438: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4439: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4440: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4441: VecResetArray(pcis->vec1_B);
4442: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4443: PetscInt j;
4445: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4446: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4447: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4448: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4449: VecResetArray(pcis->vec1_D);
4450: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4451: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4452: }
4453: VecResetArray(pcbddc->vec1_R);
4454: }
4455: }
4456: if (n_constraints) {
4457: MatDestroy(&local_auxmat2_R);
4458: }
4459: PetscFree(p0_lidx_I);
4461: /* coarse matrix entries relative to B_0 */
4462: if (pcbddc->benign_n) {
4463: Mat B0_B,B0_BPHI;
4464: IS is_dummy;
4465: PetscScalar *data;
4466: PetscInt j;
4468: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4469: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4470: ISDestroy(&is_dummy);
4471: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4472: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4473: MatDenseGetArray(B0_BPHI,&data);
4474: for (j=0;j<pcbddc->benign_n;j++) {
4475: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4476: for (i=0;i<pcbddc->local_primal_size;i++) {
4477: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4478: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4479: }
4480: }
4481: MatDenseRestoreArray(B0_BPHI,&data);
4482: MatDestroy(&B0_B);
4483: MatDestroy(&B0_BPHI);
4484: }
4486: /* compute other basis functions for non-symmetric problems */
4487: if (!pcbddc->symmetric_primal) {
4488: Mat B_V=NULL,B_C=NULL;
4489: PetscScalar *marray;
4491: if (n_constraints) {
4492: Mat S_CCT,C_CRT;
4494: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4495: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4496: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4497: MatDestroy(&S_CCT);
4498: if (n_vertices) {
4499: Mat S_VCT;
4501: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4502: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4503: MatDestroy(&S_VCT);
4504: }
4505: MatDestroy(&C_CRT);
4506: } else {
4507: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4508: }
4509: if (n_vertices && n_R) {
4510: PetscScalar *av,*marray;
4511: const PetscInt *xadj,*adjncy;
4512: PetscInt n;
4513: PetscBool flg_row;
4515: /* B_V = B_V - A_VR^T */
4516: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4517: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4518: MatSeqAIJGetArray(A_VR,&av);
4519: MatDenseGetArray(B_V,&marray);
4520: for (i=0;i<n;i++) {
4521: PetscInt j;
4522: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4523: }
4524: MatDenseRestoreArray(B_V,&marray);
4525: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4526: MatDestroy(&A_VR);
4527: }
4529: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4530: if (n_vertices) {
4531: MatDenseGetArray(B_V,&marray);
4532: for (i=0;i<n_vertices;i++) {
4533: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4534: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4535: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4536: VecResetArray(pcbddc->vec1_R);
4537: VecResetArray(pcbddc->vec2_R);
4538: }
4539: MatDenseRestoreArray(B_V,&marray);
4540: }
4541: if (B_C) {
4542: MatDenseGetArray(B_C,&marray);
4543: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4544: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4545: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4546: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4547: VecResetArray(pcbddc->vec1_R);
4548: VecResetArray(pcbddc->vec2_R);
4549: }
4550: MatDenseRestoreArray(B_C,&marray);
4551: }
4552: /* coarse basis functions */
4553: for (i=0;i<pcbddc->local_primal_size;i++) {
4554: PetscScalar *y;
4556: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4557: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4558: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4559: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4560: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4561: if (i<n_vertices) {
4562: y[n_B*i+idx_V_B[i]] = 1.0;
4563: }
4564: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4565: VecResetArray(pcis->vec1_B);
4567: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4568: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4569: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4570: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4571: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4572: VecResetArray(pcis->vec1_D);
4573: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4574: }
4575: VecResetArray(pcbddc->vec1_R);
4576: }
4577: MatDestroy(&B_V);
4578: MatDestroy(&B_C);
4579: }
4581: /* free memory */
4582: PetscFree(idx_V_B);
4583: MatDestroy(&S_VV);
4584: MatDestroy(&S_CV);
4585: MatDestroy(&S_VC);
4586: MatDestroy(&S_CC);
4587: PetscFree(work);
4588: if (n_vertices) {
4589: MatDestroy(&A_VR);
4590: }
4591: if (n_constraints) {
4592: MatDestroy(&C_CR);
4593: }
4594: /* Checking coarse_sub_mat and coarse basis functios */
4595: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4596: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4597: if (pcbddc->dbg_flag) {
4598: Mat coarse_sub_mat;
4599: Mat AUXMAT,TM1,TM2,TM3,TM4;
4600: Mat coarse_phi_D,coarse_phi_B;
4601: Mat coarse_psi_D,coarse_psi_B;
4602: Mat A_II,A_BB,A_IB,A_BI;
4603: Mat C_B,CPHI;
4604: IS is_dummy;
4605: Vec mones;
4606: MatType checkmattype=MATSEQAIJ;
4607: PetscReal real_value;
4609: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4610: Mat A;
4611: PCBDDCBenignProject(pc,NULL,NULL,&A);
4612: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4613: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4614: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4615: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4616: MatDestroy(&A);
4617: } else {
4618: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4619: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4620: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4621: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4622: }
4623: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4624: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4625: if (!pcbddc->symmetric_primal) {
4626: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4627: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4628: }
4629: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4631: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4632: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4633: PetscViewerFlush(pcbddc->dbg_viewer);
4634: if (!pcbddc->symmetric_primal) {
4635: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4636: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4637: MatDestroy(&AUXMAT);
4638: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4639: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4640: MatDestroy(&AUXMAT);
4641: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4642: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4643: MatDestroy(&AUXMAT);
4644: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4645: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4646: MatDestroy(&AUXMAT);
4647: } else {
4648: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4649: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4650: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4651: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4652: MatDestroy(&AUXMAT);
4653: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4654: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4655: MatDestroy(&AUXMAT);
4656: }
4657: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4658: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4659: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4660: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4661: if (pcbddc->benign_n) {
4662: Mat B0_B,B0_BPHI;
4663: PetscScalar *data,*data2;
4664: PetscInt j;
4666: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4667: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4668: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4669: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4670: MatDenseGetArray(TM1,&data);
4671: MatDenseGetArray(B0_BPHI,&data2);
4672: for (j=0;j<pcbddc->benign_n;j++) {
4673: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4674: for (i=0;i<pcbddc->local_primal_size;i++) {
4675: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4676: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4677: }
4678: }
4679: MatDenseRestoreArray(TM1,&data);
4680: MatDenseRestoreArray(B0_BPHI,&data2);
4681: MatDestroy(&B0_B);
4682: ISDestroy(&is_dummy);
4683: MatDestroy(&B0_BPHI);
4684: }
4685: #if 0
4686: {
4687: PetscViewer viewer;
4688: char filename[256];
4689: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4690: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4691: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4692: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4693: MatView(coarse_sub_mat,viewer);
4694: PetscObjectSetName((PetscObject)TM1,"projected");
4695: MatView(TM1,viewer);
4696: if (pcbddc->coarse_phi_B) {
4697: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4698: MatView(pcbddc->coarse_phi_B,viewer);
4699: }
4700: if (pcbddc->coarse_phi_D) {
4701: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4702: MatView(pcbddc->coarse_phi_D,viewer);
4703: }
4704: if (pcbddc->coarse_psi_B) {
4705: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4706: MatView(pcbddc->coarse_psi_B,viewer);
4707: }
4708: if (pcbddc->coarse_psi_D) {
4709: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4710: MatView(pcbddc->coarse_psi_D,viewer);
4711: }
4712: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4713: MatView(pcbddc->local_mat,viewer);
4714: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4715: MatView(pcbddc->ConstraintMatrix,viewer);
4716: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4717: ISView(pcis->is_I_local,viewer);
4718: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4719: ISView(pcis->is_B_local,viewer);
4720: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4721: ISView(pcbddc->is_R_local,viewer);
4722: PetscViewerDestroy(&viewer);
4723: }
4724: #endif
4725: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4726: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4727: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4728: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4730: /* check constraints */
4731: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4732: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4733: if (!pcbddc->benign_n) { /* TODO: add benign case */
4734: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4735: } else {
4736: PetscScalar *data;
4737: Mat tmat;
4738: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4739: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4740: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4741: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4742: MatDestroy(&tmat);
4743: }
4744: MatCreateVecs(CPHI,&mones,NULL);
4745: VecSet(mones,-1.0);
4746: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4747: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4748: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4749: if (!pcbddc->symmetric_primal) {
4750: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4751: VecSet(mones,-1.0);
4752: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4753: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4754: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4755: }
4756: MatDestroy(&C_B);
4757: MatDestroy(&CPHI);
4758: ISDestroy(&is_dummy);
4759: VecDestroy(&mones);
4760: PetscViewerFlush(pcbddc->dbg_viewer);
4761: MatDestroy(&A_II);
4762: MatDestroy(&A_BB);
4763: MatDestroy(&A_IB);
4764: MatDestroy(&A_BI);
4765: MatDestroy(&TM1);
4766: MatDestroy(&TM2);
4767: MatDestroy(&TM3);
4768: MatDestroy(&TM4);
4769: MatDestroy(&coarse_phi_D);
4770: MatDestroy(&coarse_phi_B);
4771: if (!pcbddc->symmetric_primal) {
4772: MatDestroy(&coarse_psi_D);
4773: MatDestroy(&coarse_psi_B);
4774: }
4775: MatDestroy(&coarse_sub_mat);
4776: }
4777: /* get back data */
4778: *coarse_submat_vals_n = coarse_submat_vals;
4779: return(0);
4780: }
4782: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4783: {
4784: Mat *work_mat;
4785: IS isrow_s,iscol_s;
4786: PetscBool rsorted,csorted;
4787: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4791: ISSorted(isrow,&rsorted);
4792: ISSorted(iscol,&csorted);
4793: ISGetLocalSize(isrow,&rsize);
4794: ISGetLocalSize(iscol,&csize);
4796: if (!rsorted) {
4797: const PetscInt *idxs;
4798: PetscInt *idxs_sorted,i;
4800: PetscMalloc1(rsize,&idxs_perm_r);
4801: PetscMalloc1(rsize,&idxs_sorted);
4802: for (i=0;i<rsize;i++) {
4803: idxs_perm_r[i] = i;
4804: }
4805: ISGetIndices(isrow,&idxs);
4806: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4807: for (i=0;i<rsize;i++) {
4808: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4809: }
4810: ISRestoreIndices(isrow,&idxs);
4811: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4812: } else {
4813: PetscObjectReference((PetscObject)isrow);
4814: isrow_s = isrow;
4815: }
4817: if (!csorted) {
4818: if (isrow == iscol) {
4819: PetscObjectReference((PetscObject)isrow_s);
4820: iscol_s = isrow_s;
4821: } else {
4822: const PetscInt *idxs;
4823: PetscInt *idxs_sorted,i;
4825: PetscMalloc1(csize,&idxs_perm_c);
4826: PetscMalloc1(csize,&idxs_sorted);
4827: for (i=0;i<csize;i++) {
4828: idxs_perm_c[i] = i;
4829: }
4830: ISGetIndices(iscol,&idxs);
4831: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4832: for (i=0;i<csize;i++) {
4833: idxs_sorted[i] = idxs[idxs_perm_c[i]];
4834: }
4835: ISRestoreIndices(iscol,&idxs);
4836: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4837: }
4838: } else {
4839: PetscObjectReference((PetscObject)iscol);
4840: iscol_s = iscol;
4841: }
4843: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
4845: if (!rsorted || !csorted) {
4846: Mat new_mat;
4847: IS is_perm_r,is_perm_c;
4849: if (!rsorted) {
4850: PetscInt *idxs_r,i;
4851: PetscMalloc1(rsize,&idxs_r);
4852: for (i=0;i<rsize;i++) {
4853: idxs_r[idxs_perm_r[i]] = i;
4854: }
4855: PetscFree(idxs_perm_r);
4856: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4857: } else {
4858: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4859: }
4860: ISSetPermutation(is_perm_r);
4862: if (!csorted) {
4863: if (isrow_s == iscol_s) {
4864: PetscObjectReference((PetscObject)is_perm_r);
4865: is_perm_c = is_perm_r;
4866: } else {
4867: PetscInt *idxs_c,i;
4868: if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4869: PetscMalloc1(csize,&idxs_c);
4870: for (i=0;i<csize;i++) {
4871: idxs_c[idxs_perm_c[i]] = i;
4872: }
4873: PetscFree(idxs_perm_c);
4874: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4875: }
4876: } else {
4877: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4878: }
4879: ISSetPermutation(is_perm_c);
4881: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4882: MatDestroy(&work_mat[0]);
4883: work_mat[0] = new_mat;
4884: ISDestroy(&is_perm_r);
4885: ISDestroy(&is_perm_c);
4886: }
4888: PetscObjectReference((PetscObject)work_mat[0]);
4889: *B = work_mat[0];
4890: MatDestroyMatrices(1,&work_mat);
4891: ISDestroy(&isrow_s);
4892: ISDestroy(&iscol_s);
4893: return(0);
4894: }
4896: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4897: {
4898: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4899: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
4900: Mat new_mat,lA;
4901: IS is_local,is_global;
4902: PetscInt local_size;
4903: PetscBool isseqaij;
4907: MatDestroy(&pcbddc->local_mat);
4908: MatGetSize(matis->A,&local_size,NULL);
4909: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
4910: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
4911: ISDestroy(&is_local);
4912: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
4913: ISDestroy(&is_global);
4915: /* check */
4916: if (pcbddc->dbg_flag) {
4917: Vec x,x_change;
4918: PetscReal error;
4920: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
4921: VecSetRandom(x,NULL);
4922: MatMult(ChangeOfBasisMatrix,x,x_change);
4923: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4924: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4925: MatMult(new_mat,matis->x,matis->y);
4926: if (!pcbddc->change_interior) {
4927: const PetscScalar *x,*y,*v;
4928: PetscReal lerror = 0.;
4929: PetscInt i;
4931: VecGetArrayRead(matis->x,&x);
4932: VecGetArrayRead(matis->y,&y);
4933: VecGetArrayRead(matis->counter,&v);
4934: for (i=0;i<local_size;i++)
4935: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
4936: lerror = PetscAbsScalar(x[i]-y[i]);
4937: VecRestoreArrayRead(matis->x,&x);
4938: VecRestoreArrayRead(matis->y,&y);
4939: VecRestoreArrayRead(matis->counter,&v);
4940: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
4941: if (error > PETSC_SMALL) {
4942: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4943: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error);
4944: } else {
4945: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error);
4946: }
4947: }
4948: }
4949: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4950: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4951: VecAXPY(x,-1.0,x_change);
4952: VecNorm(x,NORM_INFINITY,&error);
4953: if (error > PETSC_SMALL) {
4954: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4955: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
4956: } else {
4957: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error);
4958: }
4959: }
4960: VecDestroy(&x);
4961: VecDestroy(&x_change);
4962: }
4964: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4965: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
4967: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4968: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
4969: if (isseqaij) {
4970: MatDestroy(&pcbddc->local_mat);
4971: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4972: if (lA) {
4973: Mat work;
4974: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4975: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4976: MatDestroy(&work);
4977: }
4978: } else {
4979: Mat work_mat;
4981: MatDestroy(&pcbddc->local_mat);
4982: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4983: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4984: MatDestroy(&work_mat);
4985: if (lA) {
4986: Mat work;
4987: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4988: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4989: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4990: MatDestroy(&work);
4991: }
4992: }
4993: if (matis->A->symmetric_set) {
4994: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
4995: #if !defined(PETSC_USE_COMPLEX)
4996: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
4997: #endif
4998: }
4999: MatDestroy(&new_mat);
5000: return(0);
5001: }
5003: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5004: {
5005: PC_IS* pcis = (PC_IS*)(pc->data);
5006: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5007: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5008: PetscInt *idx_R_local=NULL;
5009: PetscInt n_vertices,i,j,n_R,n_D,n_B;
5010: PetscInt vbs,bs;
5011: PetscBT bitmask=NULL;
5012: PetscErrorCode ierr;
5015: /*
5016: No need to setup local scatters if
5017: - primal space is unchanged
5018: AND
5019: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5020: AND
5021: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5022: */
5023: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5024: return(0);
5025: }
5026: /* destroy old objects */
5027: ISDestroy(&pcbddc->is_R_local);
5028: VecScatterDestroy(&pcbddc->R_to_B);
5029: VecScatterDestroy(&pcbddc->R_to_D);
5030: /* Set Non-overlapping dimensions */
5031: n_B = pcis->n_B;
5032: n_D = pcis->n - n_B;
5033: n_vertices = pcbddc->n_vertices;
5035: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5037: /* create auxiliary bitmask and allocate workspace */
5038: if (!sub_schurs || !sub_schurs->reuse_solver) {
5039: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5040: PetscBTCreate(pcis->n,&bitmask);
5041: for (i=0;i<n_vertices;i++) {
5042: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5043: }
5045: for (i=0, n_R=0; i<pcis->n; i++) {
5046: if (!PetscBTLookup(bitmask,i)) {
5047: idx_R_local[n_R++] = i;
5048: }
5049: }
5050: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5051: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5053: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5054: ISGetLocalSize(reuse_solver->is_R,&n_R);
5055: }
5057: /* Block code */
5058: vbs = 1;
5059: MatGetBlockSize(pcbddc->local_mat,&bs);
5060: if (bs>1 && !(n_vertices%bs)) {
5061: PetscBool is_blocked = PETSC_TRUE;
5062: PetscInt *vary;
5063: if (!sub_schurs || !sub_schurs->reuse_solver) {
5064: PetscMalloc1(pcis->n/bs,&vary);
5065: PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
5066: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5067: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5068: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5069: for (i=0; i<pcis->n/bs; i++) {
5070: if (vary[i]!=0 && vary[i]!=bs) {
5071: is_blocked = PETSC_FALSE;
5072: break;
5073: }
5074: }
5075: PetscFree(vary);
5076: } else {
5077: /* Verify directly the R set */
5078: for (i=0; i<n_R/bs; i++) {
5079: PetscInt j,node=idx_R_local[bs*i];
5080: for (j=1; j<bs; j++) {
5081: if (node != idx_R_local[bs*i+j]-j) {
5082: is_blocked = PETSC_FALSE;
5083: break;
5084: }
5085: }
5086: }
5087: }
5088: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5089: vbs = bs;
5090: for (i=0;i<n_R/vbs;i++) {
5091: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5092: }
5093: }
5094: }
5095: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5096: if (sub_schurs && sub_schurs->reuse_solver) {
5097: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5099: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5100: ISDestroy(&reuse_solver->is_R);
5101: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5102: reuse_solver->is_R = pcbddc->is_R_local;
5103: } else {
5104: PetscFree(idx_R_local);
5105: }
5107: /* print some info if requested */
5108: if (pcbddc->dbg_flag) {
5109: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5110: PetscViewerFlush(pcbddc->dbg_viewer);
5111: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5112: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5113: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
5114: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5115: PetscViewerFlush(pcbddc->dbg_viewer);
5116: }
5118: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5119: if (!sub_schurs || !sub_schurs->reuse_solver) {
5120: IS is_aux1,is_aux2;
5121: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5123: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5124: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5125: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5126: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5127: for (i=0; i<n_D; i++) {
5128: PetscBTSet(bitmask,is_indices[i]);
5129: }
5130: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5131: for (i=0, j=0; i<n_R; i++) {
5132: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5133: aux_array1[j++] = i;
5134: }
5135: }
5136: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5137: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5138: for (i=0, j=0; i<n_B; i++) {
5139: if (!PetscBTLookup(bitmask,is_indices[i])) {
5140: aux_array2[j++] = i;
5141: }
5142: }
5143: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5144: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5145: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5146: ISDestroy(&is_aux1);
5147: ISDestroy(&is_aux2);
5149: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5150: PetscMalloc1(n_D,&aux_array1);
5151: for (i=0, j=0; i<n_R; i++) {
5152: if (PetscBTLookup(bitmask,idx_R_local[i])) {
5153: aux_array1[j++] = i;
5154: }
5155: }
5156: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5157: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5158: ISDestroy(&is_aux1);
5159: }
5160: PetscBTDestroy(&bitmask);
5161: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5162: } else {
5163: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5164: IS tis;
5165: PetscInt schur_size;
5167: ISGetLocalSize(reuse_solver->is_B,&schur_size);
5168: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5169: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5170: ISDestroy(&tis);
5171: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5172: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5173: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5174: ISDestroy(&tis);
5175: }
5176: }
5177: return(0);
5178: }
5181: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5182: {
5183: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
5184: PC_IS *pcis = (PC_IS*)pc->data;
5185: PC pc_temp;
5186: Mat A_RR;
5187: MatReuse reuse;
5188: PetscScalar m_one = -1.0;
5189: PetscReal value;
5190: PetscInt n_D,n_R;
5191: PetscBool check_corr,issbaij;
5193: /* prefixes stuff */
5194: char dir_prefix[256],neu_prefix[256],str_level[16];
5195: size_t len;
5199: /* compute prefixes */
5200: PetscStrcpy(dir_prefix,"");
5201: PetscStrcpy(neu_prefix,"");
5202: if (!pcbddc->current_level) {
5203: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5204: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5205: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5206: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5207: } else {
5208: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5209: PetscStrlen(((PetscObject)pc)->prefix,&len);
5210: len -= 15; /* remove "pc_bddc_coarse_" */
5211: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5212: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5213: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5214: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5215: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5216: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5217: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5218: PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5219: PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5220: }
5222: /* DIRICHLET PROBLEM */
5223: if (dirichlet) {
5224: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5225: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5226: if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n");
5227: if (pcbddc->dbg_flag) {
5228: Mat A_IIn;
5230: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5231: MatDestroy(&pcis->A_II);
5232: pcis->A_II = A_IIn;
5233: }
5234: }
5235: if (pcbddc->local_mat->symmetric_set) {
5236: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5237: }
5238: /* Matrix for Dirichlet problem is pcis->A_II */
5239: n_D = pcis->n - pcis->n_B;
5240: if (!pcbddc->ksp_D) { /* create object if not yet build */
5241: void (*f)(void) = 0;
5243: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5244: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5245: /* default */
5246: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5247: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5248: PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
5249: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5250: if (issbaij) {
5251: PCSetType(pc_temp,PCCHOLESKY);
5252: } else {
5253: PCSetType(pc_temp,PCLU);
5254: }
5255: /* Allow user's customization */
5256: KSPSetFromOptions(pcbddc->ksp_D);
5257: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5258: if (f && pcbddc->mat_graph->cloc) {
5259: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5260: const PetscInt *idxs;
5261: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5263: ISGetLocalSize(pcis->is_I_local,&nl);
5264: ISGetIndices(pcis->is_I_local,&idxs);
5265: PetscMalloc1(nl*cdim,&scoords);
5266: for (i=0;i<nl;i++) {
5267: for (d=0;d<cdim;d++) {
5268: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5269: }
5270: }
5271: ISRestoreIndices(pcis->is_I_local,&idxs);
5272: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5273: PetscFree(scoords);
5274: }
5275: }
5276: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
5277: if (sub_schurs && sub_schurs->reuse_solver) {
5278: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5280: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5281: }
5282: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5283: if (!n_D) {
5284: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5285: PCSetType(pc_temp,PCNONE);
5286: }
5287: /* set ksp_D into pcis data */
5288: KSPDestroy(&pcis->ksp_D);
5289: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5290: pcis->ksp_D = pcbddc->ksp_D;
5291: }
5293: /* NEUMANN PROBLEM */
5294: A_RR = 0;
5295: if (neumann) {
5296: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5297: PetscInt ibs,mbs;
5298: PetscBool issbaij, reuse_neumann_solver;
5299: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5301: reuse_neumann_solver = PETSC_FALSE;
5302: if (sub_schurs && sub_schurs->reuse_solver) {
5303: IS iP;
5305: reuse_neumann_solver = PETSC_TRUE;
5306: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5307: if (iP) reuse_neumann_solver = PETSC_FALSE;
5308: }
5309: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5310: ISGetSize(pcbddc->is_R_local,&n_R);
5311: if (pcbddc->ksp_R) { /* already created ksp */
5312: PetscInt nn_R;
5313: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5314: PetscObjectReference((PetscObject)A_RR);
5315: MatGetSize(A_RR,&nn_R,NULL);
5316: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5317: KSPReset(pcbddc->ksp_R);
5318: MatDestroy(&A_RR);
5319: reuse = MAT_INITIAL_MATRIX;
5320: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5321: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5322: MatDestroy(&A_RR);
5323: reuse = MAT_INITIAL_MATRIX;
5324: } else { /* safe to reuse the matrix */
5325: reuse = MAT_REUSE_MATRIX;
5326: }
5327: }
5328: /* last check */
5329: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5330: MatDestroy(&A_RR);
5331: reuse = MAT_INITIAL_MATRIX;
5332: }
5333: } else { /* first time, so we need to create the matrix */
5334: reuse = MAT_INITIAL_MATRIX;
5335: }
5336: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5337: MatGetBlockSize(pcbddc->local_mat,&mbs);
5338: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5339: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5340: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5341: if (matis->A == pcbddc->local_mat) {
5342: MatDestroy(&pcbddc->local_mat);
5343: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5344: } else {
5345: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5346: }
5347: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5348: if (matis->A == pcbddc->local_mat) {
5349: MatDestroy(&pcbddc->local_mat);
5350: MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5351: } else {
5352: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5353: }
5354: }
5355: /* extract A_RR */
5356: if (reuse_neumann_solver) {
5357: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5359: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5360: MatDestroy(&A_RR);
5361: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5362: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5363: } else {
5364: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5365: }
5366: } else {
5367: MatDestroy(&A_RR);
5368: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5369: PetscObjectReference((PetscObject)A_RR);
5370: }
5371: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5372: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5373: }
5374: if (pcbddc->local_mat->symmetric_set) {
5375: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5376: }
5377: if (!pcbddc->ksp_R) { /* create object if not present */
5378: void (*f)(void) = 0;
5380: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5381: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5382: /* default */
5383: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5384: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5385: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5386: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5387: if (issbaij) {
5388: PCSetType(pc_temp,PCCHOLESKY);
5389: } else {
5390: PCSetType(pc_temp,PCLU);
5391: }
5392: /* Allow user's customization */
5393: KSPSetFromOptions(pcbddc->ksp_R);
5394: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5395: if (f && pcbddc->mat_graph->cloc) {
5396: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5397: const PetscInt *idxs;
5398: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5400: ISGetLocalSize(pcbddc->is_R_local,&nl);
5401: ISGetIndices(pcbddc->is_R_local,&idxs);
5402: PetscMalloc1(nl*cdim,&scoords);
5403: for (i=0;i<nl;i++) {
5404: for (d=0;d<cdim;d++) {
5405: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5406: }
5407: }
5408: ISRestoreIndices(pcbddc->is_R_local,&idxs);
5409: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5410: PetscFree(scoords);
5411: }
5412: }
5413: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5414: if (!n_R) {
5415: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5416: PCSetType(pc_temp,PCNONE);
5417: }
5418: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5419: /* Reuse solver if it is present */
5420: if (reuse_neumann_solver) {
5421: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5423: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5424: }
5425: }
5427: if (pcbddc->dbg_flag) {
5428: PetscViewerFlush(pcbddc->dbg_viewer);
5429: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5430: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5431: }
5433: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5434: check_corr = PETSC_FALSE;
5435: if (pcbddc->NullSpace_corr[0]) {
5436: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5437: }
5438: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5439: check_corr = PETSC_TRUE;
5440: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5441: }
5442: if (neumann && pcbddc->NullSpace_corr[2]) {
5443: check_corr = PETSC_TRUE;
5444: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5445: }
5446: /* check Dirichlet and Neumann solvers */
5447: if (pcbddc->dbg_flag) {
5448: if (dirichlet) { /* Dirichlet */
5449: VecSetRandom(pcis->vec1_D,NULL);
5450: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5451: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5452: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5453: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5454: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5455: if (check_corr) {
5456: PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);
5457: }
5458: PetscViewerFlush(pcbddc->dbg_viewer);
5459: }
5460: if (neumann) { /* Neumann */
5461: VecSetRandom(pcbddc->vec1_R,NULL);
5462: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5463: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5464: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5465: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5466: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5467: if (check_corr) {
5468: PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);
5469: }
5470: PetscViewerFlush(pcbddc->dbg_viewer);
5471: }
5472: }
5473: /* free Neumann problem's matrix */
5474: MatDestroy(&A_RR);
5475: return(0);
5476: }
5478: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5479: {
5480: PetscErrorCode ierr;
5481: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5482: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5483: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;
5486: if (!reuse_solver) {
5487: VecSet(pcbddc->vec1_R,0.);
5488: }
5489: if (!pcbddc->switch_static) {
5490: if (applytranspose && pcbddc->local_auxmat1) {
5491: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5492: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5493: }
5494: if (!reuse_solver) {
5495: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5496: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5497: } else {
5498: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5500: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5501: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5502: }
5503: } else {
5504: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5505: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5506: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5507: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5508: if (applytranspose && pcbddc->local_auxmat1) {
5509: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5510: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5511: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5512: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5513: }
5514: }
5515: if (!reuse_solver || pcbddc->switch_static) {
5516: if (applytranspose) {
5517: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5518: } else {
5519: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5520: }
5521: } else {
5522: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5524: if (applytranspose) {
5525: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5526: } else {
5527: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5528: }
5529: }
5530: VecSet(inout_B,0.);
5531: if (!pcbddc->switch_static) {
5532: if (!reuse_solver) {
5533: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5534: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5535: } else {
5536: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5538: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5539: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5540: }
5541: if (!applytranspose && pcbddc->local_auxmat1) {
5542: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5543: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5544: }
5545: } else {
5546: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5547: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5548: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5549: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5550: if (!applytranspose && pcbddc->local_auxmat1) {
5551: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5552: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5553: }
5554: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5555: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5556: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5557: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5558: }
5559: return(0);
5560: }
5562: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5563: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5564: {
5566: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5567: PC_IS* pcis = (PC_IS*) (pc->data);
5568: const PetscScalar zero = 0.0;
5571: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5572: if (!pcbddc->benign_apply_coarse_only) {
5573: if (applytranspose) {
5574: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5575: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5576: } else {
5577: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5578: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5579: }
5580: } else {
5581: VecSet(pcbddc->vec1_P,zero);
5582: }
5584: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5585: if (pcbddc->benign_n) {
5586: PetscScalar *array;
5587: PetscInt j;
5589: VecGetArray(pcbddc->vec1_P,&array);
5590: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5591: VecRestoreArray(pcbddc->vec1_P,&array);
5592: }
5594: /* start communications from local primal nodes to rhs of coarse solver */
5595: VecSet(pcbddc->coarse_vec,zero);
5596: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5597: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5599: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5600: if (pcbddc->coarse_ksp) {
5601: Mat coarse_mat;
5602: Vec rhs,sol;
5603: MatNullSpace nullsp;
5604: PetscBool isbddc = PETSC_FALSE;
5606: if (pcbddc->benign_have_null) {
5607: PC coarse_pc;
5609: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5610: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5611: /* we need to propagate to coarser levels the need for a possible benign correction */
5612: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5613: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5614: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5615: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5616: }
5617: }
5618: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5619: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5620: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5621: MatGetNullSpace(coarse_mat,&nullsp);
5622: if (nullsp) {
5623: MatNullSpaceRemove(nullsp,rhs);
5624: }
5625: if (applytranspose) {
5626: if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5627: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5628: } else {
5629: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5630: PC coarse_pc;
5632: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5633: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5634: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5635: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5636: } else {
5637: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5638: }
5639: }
5640: /* we don't need the benign correction at coarser levels anymore */
5641: if (pcbddc->benign_have_null && isbddc) {
5642: PC coarse_pc;
5643: PC_BDDC* coarsepcbddc;
5645: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5646: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5647: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5648: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5649: }
5650: if (nullsp) {
5651: MatNullSpaceRemove(nullsp,sol);
5652: }
5653: }
5655: /* Local solution on R nodes */
5656: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5657: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5658: }
5659: /* communications from coarse sol to local primal nodes */
5660: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5661: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5663: /* Sum contributions from the two levels */
5664: if (!pcbddc->benign_apply_coarse_only) {
5665: if (applytranspose) {
5666: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5667: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5668: } else {
5669: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5670: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5671: }
5672: /* store p0 */
5673: if (pcbddc->benign_n) {
5674: PetscScalar *array;
5675: PetscInt j;
5677: VecGetArray(pcbddc->vec1_P,&array);
5678: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5679: VecRestoreArray(pcbddc->vec1_P,&array);
5680: }
5681: } else { /* expand the coarse solution */
5682: if (applytranspose) {
5683: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5684: } else {
5685: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5686: }
5687: }
5688: return(0);
5689: }
5691: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5692: {
5694: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5695: PetscScalar *array;
5696: Vec from,to;
5699: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5700: from = pcbddc->coarse_vec;
5701: to = pcbddc->vec1_P;
5702: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5703: Vec tvec;
5705: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5706: VecResetArray(tvec);
5707: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5708: VecGetArray(tvec,&array);
5709: VecPlaceArray(from,array);
5710: VecRestoreArray(tvec,&array);
5711: }
5712: } else { /* from local to global -> put data in coarse right hand side */
5713: from = pcbddc->vec1_P;
5714: to = pcbddc->coarse_vec;
5715: }
5716: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5717: return(0);
5718: }
5720: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5721: {
5723: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5724: PetscScalar *array;
5725: Vec from,to;
5728: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5729: from = pcbddc->coarse_vec;
5730: to = pcbddc->vec1_P;
5731: } else { /* from local to global -> put data in coarse right hand side */
5732: from = pcbddc->vec1_P;
5733: to = pcbddc->coarse_vec;
5734: }
5735: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5736: if (smode == SCATTER_FORWARD) {
5737: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5738: Vec tvec;
5740: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5741: VecGetArray(to,&array);
5742: VecPlaceArray(tvec,array);
5743: VecRestoreArray(to,&array);
5744: }
5745: } else {
5746: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5747: VecResetArray(from);
5748: }
5749: }
5750: return(0);
5751: }
5753: /* uncomment for testing purposes */
5754: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5755: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5756: {
5757: PetscErrorCode ierr;
5758: PC_IS* pcis = (PC_IS*)(pc->data);
5759: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5760: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5761: /* one and zero */
5762: PetscScalar one=1.0,zero=0.0;
5763: /* space to store constraints and their local indices */
5764: PetscScalar *constraints_data;
5765: PetscInt *constraints_idxs,*constraints_idxs_B;
5766: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
5767: PetscInt *constraints_n;
5768: /* iterators */
5769: PetscInt i,j,k,total_counts,total_counts_cc,cum;
5770: /* BLAS integers */
5771: PetscBLASInt lwork,lierr;
5772: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
5773: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
5774: /* reuse */
5775: PetscInt olocal_primal_size,olocal_primal_size_cc;
5776: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
5777: /* change of basis */
5778: PetscBool qr_needed;
5779: PetscBT change_basis,qr_needed_idx;
5780: /* auxiliary stuff */
5781: PetscInt *nnz,*is_indices;
5782: PetscInt ncc;
5783: /* some quantities */
5784: PetscInt n_vertices,total_primal_vertices,valid_constraints;
5785: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5786: PetscReal tol; /* tolerance for retaining eigenmodes */
5789: tol = PetscSqrtReal(PETSC_SMALL);
5790: /* Destroy Mat objects computed previously */
5791: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5792: MatDestroy(&pcbddc->ConstraintMatrix);
5793: MatDestroy(&pcbddc->switch_static_change);
5794: /* save info on constraints from previous setup (if any) */
5795: olocal_primal_size = pcbddc->local_primal_size;
5796: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5797: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5798: PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5799: PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5800: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5801: PetscFree(pcbddc->primal_indices_local_idxs);
5803: if (!pcbddc->adaptive_selection) {
5804: IS ISForVertices,*ISForFaces,*ISForEdges;
5805: MatNullSpace nearnullsp;
5806: const Vec *nearnullvecs;
5807: Vec *localnearnullsp;
5808: PetscScalar *array;
5809: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
5810: PetscBool nnsp_has_cnst;
5811: /* LAPACK working arrays for SVD or POD */
5812: PetscBool skip_lapack,boolforchange;
5813: PetscScalar *work;
5814: PetscReal *singular_vals;
5815: #if defined(PETSC_USE_COMPLEX)
5816: PetscReal *rwork;
5817: #endif
5818: #if defined(PETSC_MISSING_LAPACK_GESVD)
5819: PetscScalar *temp_basis,*correlation_mat;
5820: #else
5821: PetscBLASInt dummy_int=1;
5822: PetscScalar dummy_scalar=1.;
5823: #endif
5825: /* Get index sets for faces, edges and vertices from graph */
5826: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
5827: /* print some info */
5828: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5829: PetscInt nv;
5831: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
5832: ISGetSize(ISForVertices,&nv);
5833: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5834: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
5835: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
5836: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
5837: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
5838: PetscViewerFlush(pcbddc->dbg_viewer);
5839: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5840: }
5842: /* free unneeded index sets */
5843: if (!pcbddc->use_vertices) {
5844: ISDestroy(&ISForVertices);
5845: }
5846: if (!pcbddc->use_edges) {
5847: for (i=0;i<n_ISForEdges;i++) {
5848: ISDestroy(&ISForEdges[i]);
5849: }
5850: PetscFree(ISForEdges);
5851: n_ISForEdges = 0;
5852: }
5853: if (!pcbddc->use_faces) {
5854: for (i=0;i<n_ISForFaces;i++) {
5855: ISDestroy(&ISForFaces[i]);
5856: }
5857: PetscFree(ISForFaces);
5858: n_ISForFaces = 0;
5859: }
5861: /* check if near null space is attached to global mat */
5862: MatGetNearNullSpace(pc->pmat,&nearnullsp);
5863: if (nearnullsp) {
5864: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
5865: /* remove any stored info */
5866: MatNullSpaceDestroy(&pcbddc->onearnullspace);
5867: PetscFree(pcbddc->onearnullvecs_state);
5868: /* store information for BDDC solver reuse */
5869: PetscObjectReference((PetscObject)nearnullsp);
5870: pcbddc->onearnullspace = nearnullsp;
5871: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
5872: for (i=0;i<nnsp_size;i++) {
5873: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
5874: }
5875: } else { /* if near null space is not provided BDDC uses constants by default */
5876: nnsp_size = 0;
5877: nnsp_has_cnst = PETSC_TRUE;
5878: }
5879: /* get max number of constraints on a single cc */
5880: max_constraints = nnsp_size;
5881: if (nnsp_has_cnst) max_constraints++;
5883: /*
5884: Evaluate maximum storage size needed by the procedure
5885: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5886: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5887: There can be multiple constraints per connected component
5888: */
5889: n_vertices = 0;
5890: if (ISForVertices) {
5891: ISGetSize(ISForVertices,&n_vertices);
5892: }
5893: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
5894: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
5896: total_counts = n_ISForFaces+n_ISForEdges;
5897: total_counts *= max_constraints;
5898: total_counts += n_vertices;
5899: PetscBTCreate(total_counts,&change_basis);
5901: total_counts = 0;
5902: max_size_of_constraint = 0;
5903: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
5904: IS used_is;
5905: if (i<n_ISForEdges) {
5906: used_is = ISForEdges[i];
5907: } else {
5908: used_is = ISForFaces[i-n_ISForEdges];
5909: }
5910: ISGetSize(used_is,&j);
5911: total_counts += j;
5912: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
5913: }
5914: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
5916: /* get local part of global near null space vectors */
5917: PetscMalloc1(nnsp_size,&localnearnullsp);
5918: for (k=0;k<nnsp_size;k++) {
5919: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
5920: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5921: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5922: }
5924: /* whether or not to skip lapack calls */
5925: skip_lapack = PETSC_TRUE;
5926: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
5928: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5929: if (!skip_lapack) {
5930: PetscScalar temp_work;
5932: #if defined(PETSC_MISSING_LAPACK_GESVD)
5933: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5934: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
5935: PetscMalloc1(max_constraints,&singular_vals);
5936: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
5937: #if defined(PETSC_USE_COMPLEX)
5938: PetscMalloc1(3*max_constraints,&rwork);
5939: #endif
5940: /* now we evaluate the optimal workspace using query with lwork=-1 */
5941: PetscBLASIntCast(max_constraints,&Blas_N);
5942: PetscBLASIntCast(max_constraints,&Blas_LDA);
5943: lwork = -1;
5944: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5945: #if !defined(PETSC_USE_COMPLEX)
5946: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
5947: #else
5948: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
5949: #endif
5950: PetscFPTrapPop();
5951: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
5952: #else /* on missing GESVD */
5953: /* SVD */
5954: PetscInt max_n,min_n;
5955: max_n = max_size_of_constraint;
5956: min_n = max_constraints;
5957: if (max_size_of_constraint < max_constraints) {
5958: min_n = max_size_of_constraint;
5959: max_n = max_constraints;
5960: }
5961: PetscMalloc1(min_n,&singular_vals);
5962: #if defined(PETSC_USE_COMPLEX)
5963: PetscMalloc1(5*min_n,&rwork);
5964: #endif
5965: /* now we evaluate the optimal workspace using query with lwork=-1 */
5966: lwork = -1;
5967: PetscBLASIntCast(max_n,&Blas_M);
5968: PetscBLASIntCast(min_n,&Blas_N);
5969: PetscBLASIntCast(max_n,&Blas_LDA);
5970: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5971: #if !defined(PETSC_USE_COMPLEX)
5972: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
5973: #else
5974: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
5975: #endif
5976: PetscFPTrapPop();
5977: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
5978: #endif /* on missing GESVD */
5979: /* Allocate optimal workspace */
5980: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
5981: PetscMalloc1(lwork,&work);
5982: }
5983: /* Now we can loop on constraining sets */
5984: total_counts = 0;
5985: constraints_idxs_ptr[0] = 0;
5986: constraints_data_ptr[0] = 0;
5987: /* vertices */
5988: if (n_vertices) {
5989: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
5990: PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
5991: for (i=0;i<n_vertices;i++) {
5992: constraints_n[total_counts] = 1;
5993: constraints_data[total_counts] = 1.0;
5994: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
5995: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
5996: total_counts++;
5997: }
5998: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
5999: n_vertices = total_counts;
6000: }
6002: /* edges and faces */
6003: total_counts_cc = total_counts;
6004: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6005: IS used_is;
6006: PetscBool idxs_copied = PETSC_FALSE;
6008: if (ncc<n_ISForEdges) {
6009: used_is = ISForEdges[ncc];
6010: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6011: } else {
6012: used_is = ISForFaces[ncc-n_ISForEdges];
6013: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6014: }
6015: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6017: ISGetSize(used_is,&size_of_constraint);
6018: ISGetIndices(used_is,(const PetscInt**)&is_indices);
6019: /* change of basis should not be performed on local periodic nodes */
6020: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6021: if (nnsp_has_cnst) {
6022: PetscScalar quad_value;
6024: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6025: idxs_copied = PETSC_TRUE;
6027: if (!pcbddc->use_nnsp_true) {
6028: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6029: } else {
6030: quad_value = 1.0;
6031: }
6032: for (j=0;j<size_of_constraint;j++) {
6033: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6034: }
6035: temp_constraints++;
6036: total_counts++;
6037: }
6038: for (k=0;k<nnsp_size;k++) {
6039: PetscReal real_value;
6040: PetscScalar *ptr_to_data;
6042: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6043: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6044: for (j=0;j<size_of_constraint;j++) {
6045: ptr_to_data[j] = array[is_indices[j]];
6046: }
6047: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6048: /* check if array is null on the connected component */
6049: PetscBLASIntCast(size_of_constraint,&Blas_N);
6050: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6051: if (real_value > tol*size_of_constraint) { /* keep indices and values */
6052: temp_constraints++;
6053: total_counts++;
6054: if (!idxs_copied) {
6055: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6056: idxs_copied = PETSC_TRUE;
6057: }
6058: }
6059: }
6060: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6061: valid_constraints = temp_constraints;
6062: if (!pcbddc->use_nnsp_true && temp_constraints) {
6063: if (temp_constraints == 1) { /* just normalize the constraint */
6064: PetscScalar norm,*ptr_to_data;
6066: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6067: PetscBLASIntCast(size_of_constraint,&Blas_N);
6068: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6069: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6070: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6071: } else { /* perform SVD */
6072: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6074: #if defined(PETSC_MISSING_LAPACK_GESVD)
6075: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6076: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6077: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6078: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6079: from that computed using LAPACKgesvd
6080: -> This is due to a different computation of eigenvectors in LAPACKheev
6081: -> The quality of the POD-computed basis will be the same */
6082: PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
6083: /* Store upper triangular part of correlation matrix */
6084: PetscBLASIntCast(size_of_constraint,&Blas_N);
6085: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6086: for (j=0;j<temp_constraints;j++) {
6087: for (k=0;k<j+1;k++) {
6088: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6089: }
6090: }
6091: /* compute eigenvalues and eigenvectors of correlation matrix */
6092: PetscBLASIntCast(temp_constraints,&Blas_N);
6093: PetscBLASIntCast(temp_constraints,&Blas_LDA);
6094: #if !defined(PETSC_USE_COMPLEX)
6095: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6096: #else
6097: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6098: #endif
6099: PetscFPTrapPop();
6100: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6101: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6102: j = 0;
6103: while (j < temp_constraints && singular_vals[j] < tol) j++;
6104: total_counts = total_counts-j;
6105: valid_constraints = temp_constraints-j;
6106: /* scale and copy POD basis into used quadrature memory */
6107: PetscBLASIntCast(size_of_constraint,&Blas_M);
6108: PetscBLASIntCast(temp_constraints,&Blas_N);
6109: PetscBLASIntCast(temp_constraints,&Blas_K);
6110: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6111: PetscBLASIntCast(temp_constraints,&Blas_LDB);
6112: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6113: if (j<temp_constraints) {
6114: PetscInt ii;
6115: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6116: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6117: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6118: PetscFPTrapPop();
6119: for (k=0;k<temp_constraints-j;k++) {
6120: for (ii=0;ii<size_of_constraint;ii++) {
6121: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6122: }
6123: }
6124: }
6125: #else /* on missing GESVD */
6126: PetscBLASIntCast(size_of_constraint,&Blas_M);
6127: PetscBLASIntCast(temp_constraints,&Blas_N);
6128: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6129: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6130: #if !defined(PETSC_USE_COMPLEX)
6131: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6132: #else
6133: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6134: #endif
6135: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6136: PetscFPTrapPop();
6137: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6138: k = temp_constraints;
6139: if (k > size_of_constraint) k = size_of_constraint;
6140: j = 0;
6141: while (j < k && singular_vals[k-j-1] < tol) j++;
6142: valid_constraints = k-j;
6143: total_counts = total_counts-temp_constraints+valid_constraints;
6144: #endif /* on missing GESVD */
6145: }
6146: }
6147: /* update pointers information */
6148: if (valid_constraints) {
6149: constraints_n[total_counts_cc] = valid_constraints;
6150: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6151: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6152: /* set change_of_basis flag */
6153: if (boolforchange) {
6154: PetscBTSet(change_basis,total_counts_cc);
6155: }
6156: total_counts_cc++;
6157: }
6158: }
6159: /* free workspace */
6160: if (!skip_lapack) {
6161: PetscFree(work);
6162: #if defined(PETSC_USE_COMPLEX)
6163: PetscFree(rwork);
6164: #endif
6165: PetscFree(singular_vals);
6166: #if defined(PETSC_MISSING_LAPACK_GESVD)
6167: PetscFree(correlation_mat);
6168: PetscFree(temp_basis);
6169: #endif
6170: }
6171: for (k=0;k<nnsp_size;k++) {
6172: VecDestroy(&localnearnullsp[k]);
6173: }
6174: PetscFree(localnearnullsp);
6175: /* free index sets of faces, edges and vertices */
6176: for (i=0;i<n_ISForFaces;i++) {
6177: ISDestroy(&ISForFaces[i]);
6178: }
6179: if (n_ISForFaces) {
6180: PetscFree(ISForFaces);
6181: }
6182: for (i=0;i<n_ISForEdges;i++) {
6183: ISDestroy(&ISForEdges[i]);
6184: }
6185: if (n_ISForEdges) {
6186: PetscFree(ISForEdges);
6187: }
6188: ISDestroy(&ISForVertices);
6189: } else {
6190: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6192: total_counts = 0;
6193: n_vertices = 0;
6194: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6195: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6196: }
6197: max_constraints = 0;
6198: total_counts_cc = 0;
6199: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6200: total_counts += pcbddc->adaptive_constraints_n[i];
6201: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6202: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6203: }
6204: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6205: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6206: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6207: constraints_data = pcbddc->adaptive_constraints_data;
6208: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6209: PetscMalloc1(total_counts_cc,&constraints_n);
6210: total_counts_cc = 0;
6211: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6212: if (pcbddc->adaptive_constraints_n[i]) {
6213: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6214: }
6215: }
6216: #if 0
6217: printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
6218: for (i=0;i<total_counts_cc;i++) {
6219: printf("const %d, start %d",i,constraints_idxs_ptr[i]);
6220: printf(" end %d:\n",constraints_idxs_ptr[i+1]);
6221: for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
6222: printf(" %d",constraints_idxs[j]);
6223: }
6224: printf("\n");
6225: printf("number of cc: %d\n",constraints_n[i]);
6226: }
6227: for (i=0;i<n_vertices;i++) {
6228: PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
6229: }
6230: for (i=0;i<sub_schurs->n_subs;i++) {
6231: PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
6232: }
6233: #endif
6235: max_size_of_constraint = 0;
6236: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6237: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6238: /* Change of basis */
6239: PetscBTCreate(total_counts_cc,&change_basis);
6240: if (pcbddc->use_change_of_basis) {
6241: for (i=0;i<sub_schurs->n_subs;i++) {
6242: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6243: PetscBTSet(change_basis,i+n_vertices);
6244: }
6245: }
6246: }
6247: }
6248: pcbddc->local_primal_size = total_counts;
6249: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
6251: /* map constraints_idxs in boundary numbering */
6252: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6253: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);
6255: /* Create constraint matrix */
6256: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6257: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6258: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
6260: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6261: /* determine if a QR strategy is needed for change of basis */
6262: qr_needed = PETSC_FALSE;
6263: PetscBTCreate(total_counts_cc,&qr_needed_idx);
6264: total_primal_vertices=0;
6265: pcbddc->local_primal_size_cc = 0;
6266: for (i=0;i<total_counts_cc;i++) {
6267: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6268: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6269: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6270: pcbddc->local_primal_size_cc += 1;
6271: } else if (PetscBTLookup(change_basis,i)) {
6272: for (k=0;k<constraints_n[i];k++) {
6273: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6274: }
6275: pcbddc->local_primal_size_cc += constraints_n[i];
6276: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6277: PetscBTSet(qr_needed_idx,i);
6278: qr_needed = PETSC_TRUE;
6279: }
6280: } else {
6281: pcbddc->local_primal_size_cc += 1;
6282: }
6283: }
6284: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6285: pcbddc->n_vertices = total_primal_vertices;
6286: /* permute indices in order to have a sorted set of vertices */
6287: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6288: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6289: PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
6290: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6292: /* nonzero structure of constraint matrix */
6293: /* and get reference dof for local constraints */
6294: PetscMalloc1(pcbddc->local_primal_size,&nnz);
6295: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6297: j = total_primal_vertices;
6298: total_counts = total_primal_vertices;
6299: cum = total_primal_vertices;
6300: for (i=n_vertices;i<total_counts_cc;i++) {
6301: if (!PetscBTLookup(change_basis,i)) {
6302: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6303: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6304: cum++;
6305: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6306: for (k=0;k<constraints_n[i];k++) {
6307: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6308: nnz[j+k] = size_of_constraint;
6309: }
6310: j += constraints_n[i];
6311: }
6312: }
6313: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6314: PetscFree(nnz);
6316: /* set values in constraint matrix */
6317: for (i=0;i<total_primal_vertices;i++) {
6318: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6319: }
6320: total_counts = total_primal_vertices;
6321: for (i=n_vertices;i<total_counts_cc;i++) {
6322: if (!PetscBTLookup(change_basis,i)) {
6323: PetscInt *cols;
6325: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6326: cols = constraints_idxs+constraints_idxs_ptr[i];
6327: for (k=0;k<constraints_n[i];k++) {
6328: PetscInt row = total_counts+k;
6329: PetscScalar *vals;
6331: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6332: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6333: }
6334: total_counts += constraints_n[i];
6335: }
6336: }
6337: /* assembling */
6338: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6339: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6340: MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);
6341: MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);
6342: MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");
6344: /*
6345: PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
6346: MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
6347: PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
6348: */
6349: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6350: if (pcbddc->use_change_of_basis) {
6351: /* dual and primal dofs on a single cc */
6352: PetscInt dual_dofs,primal_dofs;
6353: /* working stuff for GEQRF */
6354: PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
6355: PetscBLASInt lqr_work;
6356: /* working stuff for UNGQR */
6357: PetscScalar *gqr_work,lgqr_work_t;
6358: PetscBLASInt lgqr_work;
6359: /* working stuff for TRTRS */
6360: PetscScalar *trs_rhs;
6361: PetscBLASInt Blas_NRHS;
6362: /* pointers for values insertion into change of basis matrix */
6363: PetscInt *start_rows,*start_cols;
6364: PetscScalar *start_vals;
6365: /* working stuff for values insertion */
6366: PetscBT is_primal;
6367: PetscInt *aux_primal_numbering_B;
6368: /* matrix sizes */
6369: PetscInt global_size,local_size;
6370: /* temporary change of basis */
6371: Mat localChangeOfBasisMatrix;
6372: /* extra space for debugging */
6373: PetscScalar *dbg_work;
6375: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6376: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6377: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6378: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6379: /* nonzeros for local mat */
6380: PetscMalloc1(pcis->n,&nnz);
6381: if (!pcbddc->benign_change || pcbddc->fake_change) {
6382: for (i=0;i<pcis->n;i++) nnz[i]=1;
6383: } else {
6384: const PetscInt *ii;
6385: PetscInt n;
6386: PetscBool flg_row;
6387: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6388: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6389: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6390: }
6391: for (i=n_vertices;i<total_counts_cc;i++) {
6392: if (PetscBTLookup(change_basis,i)) {
6393: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6394: if (PetscBTLookup(qr_needed_idx,i)) {
6395: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6396: } else {
6397: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6398: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6399: }
6400: }
6401: }
6402: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6403: PetscFree(nnz);
6404: /* Set interior change in the matrix */
6405: if (!pcbddc->benign_change || pcbddc->fake_change) {
6406: for (i=0;i<pcis->n;i++) {
6407: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6408: }
6409: } else {
6410: const PetscInt *ii,*jj;
6411: PetscScalar *aa;
6412: PetscInt n;
6413: PetscBool flg_row;
6414: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6415: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6416: for (i=0;i<n;i++) {
6417: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6418: }
6419: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6420: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6421: }
6423: if (pcbddc->dbg_flag) {
6424: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6425: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6426: }
6429: /* Now we loop on the constraints which need a change of basis */
6430: /*
6431: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6432: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6434: Basic blocks of change of basis matrix T computed by
6436: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6438: | 1 0 ... 0 s_1/S |
6439: | 0 1 ... 0 s_2/S |
6440: | ... |
6441: | 0 ... 1 s_{n-1}/S |
6442: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6444: with S = \sum_{i=1}^n s_i^2
6445: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6446: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6448: - QR decomposition of constraints otherwise
6449: */
6450: if (qr_needed) {
6451: /* space to store Q */
6452: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6453: /* array to store scaling factors for reflectors */
6454: PetscMalloc1(max_constraints,&qr_tau);
6455: /* first we issue queries for optimal work */
6456: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6457: PetscBLASIntCast(max_constraints,&Blas_N);
6458: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6459: lqr_work = -1;
6460: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6461: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6462: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6463: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6464: lgqr_work = -1;
6465: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6466: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6467: PetscBLASIntCast(max_constraints,&Blas_K);
6468: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6469: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6470: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6471: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6472: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6473: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6474: /* array to store rhs and solution of triangular solver */
6475: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6476: /* allocating workspace for check */
6477: if (pcbddc->dbg_flag) {
6478: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6479: }
6480: }
6481: /* array to store whether a node is primal or not */
6482: PetscBTCreate(pcis->n_B,&is_primal);
6483: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6484: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6485: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
6486: for (i=0;i<total_primal_vertices;i++) {
6487: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6488: }
6489: PetscFree(aux_primal_numbering_B);
6491: /* loop on constraints and see whether or not they need a change of basis and compute it */
6492: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6493: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6494: if (PetscBTLookup(change_basis,total_counts)) {
6495: /* get constraint info */
6496: primal_dofs = constraints_n[total_counts];
6497: dual_dofs = size_of_constraint-primal_dofs;
6499: if (pcbddc->dbg_flag) {
6500: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
6501: }
6503: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6505: /* copy quadrature constraints for change of basis check */
6506: if (pcbddc->dbg_flag) {
6507: PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6508: }
6509: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6510: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6512: /* compute QR decomposition of constraints */
6513: PetscBLASIntCast(size_of_constraint,&Blas_M);
6514: PetscBLASIntCast(primal_dofs,&Blas_N);
6515: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6516: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6517: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6518: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6519: PetscFPTrapPop();
6521: /* explictly compute R^-T */
6522: PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6523: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6524: PetscBLASIntCast(primal_dofs,&Blas_N);
6525: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6526: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6527: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6528: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6529: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6530: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6531: PetscFPTrapPop();
6533: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6534: PetscBLASIntCast(size_of_constraint,&Blas_M);
6535: PetscBLASIntCast(size_of_constraint,&Blas_N);
6536: PetscBLASIntCast(primal_dofs,&Blas_K);
6537: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6538: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6539: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6540: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6541: PetscFPTrapPop();
6543: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6544: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6545: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6546: PetscBLASIntCast(size_of_constraint,&Blas_M);
6547: PetscBLASIntCast(primal_dofs,&Blas_N);
6548: PetscBLASIntCast(primal_dofs,&Blas_K);
6549: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6550: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6551: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6552: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6553: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6554: PetscFPTrapPop();
6555: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6557: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6558: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6559: /* insert cols for primal dofs */
6560: for (j=0;j<primal_dofs;j++) {
6561: start_vals = &qr_basis[j*size_of_constraint];
6562: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6563: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6564: }
6565: /* insert cols for dual dofs */
6566: for (j=0,k=0;j<dual_dofs;k++) {
6567: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6568: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6569: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6570: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6571: j++;
6572: }
6573: }
6575: /* check change of basis */
6576: if (pcbddc->dbg_flag) {
6577: PetscInt ii,jj;
6578: PetscBool valid_qr=PETSC_TRUE;
6579: PetscBLASIntCast(primal_dofs,&Blas_M);
6580: PetscBLASIntCast(size_of_constraint,&Blas_N);
6581: PetscBLASIntCast(size_of_constraint,&Blas_K);
6582: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6583: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6584: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6585: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6586: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6587: PetscFPTrapPop();
6588: for (jj=0;jj<size_of_constraint;jj++) {
6589: for (ii=0;ii<primal_dofs;ii++) {
6590: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6591: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6592: }
6593: }
6594: if (!valid_qr) {
6595: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6596: for (jj=0;jj<size_of_constraint;jj++) {
6597: for (ii=0;ii<primal_dofs;ii++) {
6598: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6599: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6600: }
6601: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6602: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6603: }
6604: }
6605: }
6606: } else {
6607: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6608: }
6609: }
6610: } else { /* simple transformation block */
6611: PetscInt row,col;
6612: PetscScalar val,norm;
6614: PetscBLASIntCast(size_of_constraint,&Blas_N);
6615: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6616: for (j=0;j<size_of_constraint;j++) {
6617: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6618: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6619: if (!PetscBTLookup(is_primal,row_B)) {
6620: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6621: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6622: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6623: } else {
6624: for (k=0;k<size_of_constraint;k++) {
6625: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6626: if (row != col) {
6627: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6628: } else {
6629: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6630: }
6631: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6632: }
6633: }
6634: }
6635: if (pcbddc->dbg_flag) {
6636: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6637: }
6638: }
6639: } else {
6640: if (pcbddc->dbg_flag) {
6641: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
6642: }
6643: }
6644: }
6646: /* free workspace */
6647: if (qr_needed) {
6648: if (pcbddc->dbg_flag) {
6649: PetscFree(dbg_work);
6650: }
6651: PetscFree(trs_rhs);
6652: PetscFree(qr_tau);
6653: PetscFree(qr_work);
6654: PetscFree(gqr_work);
6655: PetscFree(qr_basis);
6656: }
6657: PetscBTDestroy(&is_primal);
6658: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6659: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6661: /* assembling of global change of variable */
6662: if (!pcbddc->fake_change) {
6663: Mat tmat;
6664: PetscInt bs;
6666: VecGetSize(pcis->vec1_global,&global_size);
6667: VecGetLocalSize(pcis->vec1_global,&local_size);
6668: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6669: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6670: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6671: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6672: MatGetBlockSize(pc->pmat,&bs);
6673: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6674: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6675: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6676: MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6677: MatDestroy(&tmat);
6678: VecSet(pcis->vec1_global,0.0);
6679: VecSet(pcis->vec1_N,1.0);
6680: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6681: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6682: VecReciprocal(pcis->vec1_global);
6683: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6685: /* check */
6686: if (pcbddc->dbg_flag) {
6687: PetscReal error;
6688: Vec x,x_change;
6690: VecDuplicate(pcis->vec1_global,&x);
6691: VecDuplicate(pcis->vec1_global,&x_change);
6692: VecSetRandom(x,NULL);
6693: VecCopy(x,pcis->vec1_global);
6694: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6695: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6696: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6697: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6698: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6699: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6700: VecAXPY(x,-1.0,x_change);
6701: VecNorm(x,NORM_INFINITY,&error);
6702: if (error > PETSC_SMALL) {
6703: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error);
6704: }
6705: VecDestroy(&x);
6706: VecDestroy(&x_change);
6707: }
6708: /* adapt sub_schurs computed (if any) */
6709: if (pcbddc->use_deluxe_scaling) {
6710: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6712: if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6713: if (sub_schurs && sub_schurs->S_Ej_all) {
6714: Mat S_new,tmat;
6715: IS is_all_N,is_V_Sall = NULL;
6717: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6718: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6719: if (pcbddc->deluxe_zerorows) {
6720: ISLocalToGlobalMapping NtoSall;
6721: IS is_V;
6722: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6723: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6724: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6725: ISLocalToGlobalMappingDestroy(&NtoSall);
6726: ISDestroy(&is_V);
6727: }
6728: ISDestroy(&is_all_N);
6729: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6730: MatDestroy(&sub_schurs->S_Ej_all);
6731: PetscObjectReference((PetscObject)S_new);
6732: if (pcbddc->deluxe_zerorows) {
6733: const PetscScalar *array;
6734: const PetscInt *idxs_V,*idxs_all;
6735: PetscInt i,n_V;
6737: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6738: ISGetLocalSize(is_V_Sall,&n_V);
6739: ISGetIndices(is_V_Sall,&idxs_V);
6740: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6741: VecGetArrayRead(pcis->D,&array);
6742: for (i=0;i<n_V;i++) {
6743: PetscScalar val;
6744: PetscInt idx;
6746: idx = idxs_V[i];
6747: val = array[idxs_all[idxs_V[i]]];
6748: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6749: }
6750: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6751: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6752: VecRestoreArrayRead(pcis->D,&array);
6753: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6754: ISRestoreIndices(is_V_Sall,&idxs_V);
6755: }
6756: sub_schurs->S_Ej_all = S_new;
6757: MatDestroy(&S_new);
6758: if (sub_schurs->sum_S_Ej_all) {
6759: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6760: MatDestroy(&sub_schurs->sum_S_Ej_all);
6761: PetscObjectReference((PetscObject)S_new);
6762: if (pcbddc->deluxe_zerorows) {
6763: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6764: }
6765: sub_schurs->sum_S_Ej_all = S_new;
6766: MatDestroy(&S_new);
6767: }
6768: ISDestroy(&is_V_Sall);
6769: MatDestroy(&tmat);
6770: }
6771: /* destroy any change of basis context in sub_schurs */
6772: if (sub_schurs && sub_schurs->change) {
6773: PetscInt i;
6775: for (i=0;i<sub_schurs->n_subs;i++) {
6776: KSPDestroy(&sub_schurs->change[i]);
6777: }
6778: PetscFree(sub_schurs->change);
6779: }
6780: }
6781: if (pcbddc->switch_static) { /* need to save the local change */
6782: pcbddc->switch_static_change = localChangeOfBasisMatrix;
6783: } else {
6784: MatDestroy(&localChangeOfBasisMatrix);
6785: }
6786: /* determine if any process has changed the pressures locally */
6787: pcbddc->change_interior = pcbddc->benign_have_null;
6788: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6789: MatDestroy(&pcbddc->ConstraintMatrix);
6790: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6791: pcbddc->use_qr_single = qr_needed;
6792: }
6793: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6794: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6795: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6796: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6797: } else {
6798: Mat benign_global = NULL;
6799: if (pcbddc->benign_have_null) {
6800: Mat tmat;
6802: pcbddc->change_interior = PETSC_TRUE;
6803: VecSet(pcis->vec1_global,0.0);
6804: VecSet(pcis->vec1_N,1.0);
6805: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6806: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6807: VecReciprocal(pcis->vec1_global);
6808: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6809: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6810: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6811: if (pcbddc->benign_change) {
6812: Mat M;
6814: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6815: MatDiagonalScale(M,pcis->vec1_N,NULL);
6816: MatISSetLocalMat(tmat,M);
6817: MatDestroy(&M);
6818: } else {
6819: Mat eye;
6820: PetscScalar *array;
6822: VecGetArray(pcis->vec1_N,&array);
6823: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);
6824: for (i=0;i<pcis->n;i++) {
6825: MatSetValue(eye,i,i,array[i],INSERT_VALUES);
6826: }
6827: VecRestoreArray(pcis->vec1_N,&array);
6828: MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);
6829: MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);
6830: MatISSetLocalMat(tmat,eye);
6831: MatDestroy(&eye);
6832: }
6833: MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);
6834: MatDestroy(&tmat);
6835: }
6836: if (pcbddc->user_ChangeOfBasisMatrix) {
6837: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6838: MatDestroy(&benign_global);
6839: } else if (pcbddc->benign_have_null) {
6840: pcbddc->ChangeOfBasisMatrix = benign_global;
6841: }
6842: }
6843: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6844: IS is_global;
6845: const PetscInt *gidxs;
6847: ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6848: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6849: ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6850: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6851: ISDestroy(&is_global);
6852: }
6853: }
6854: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6855: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6856: }
6858: if (!pcbddc->fake_change) {
6859: /* add pressure dofs to set of primal nodes for numbering purposes */
6860: for (i=0;i<pcbddc->benign_n;i++) {
6861: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6862: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6863: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6864: pcbddc->local_primal_size_cc++;
6865: pcbddc->local_primal_size++;
6866: }
6868: /* check if a new primal space has been introduced (also take into account benign trick) */
6869: pcbddc->new_primal_space_local = PETSC_TRUE;
6870: if (olocal_primal_size == pcbddc->local_primal_size) {
6871: PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6872: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6873: if (!pcbddc->new_primal_space_local) {
6874: PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6875: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6876: }
6877: }
6878: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6879: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
6880: }
6881: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
6883: /* flush dbg viewer */
6884: if (pcbddc->dbg_flag) {
6885: PetscViewerFlush(pcbddc->dbg_viewer);
6886: }
6888: /* free workspace */
6889: PetscBTDestroy(&qr_needed_idx);
6890: PetscBTDestroy(&change_basis);
6891: if (!pcbddc->adaptive_selection) {
6892: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
6893: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
6894: } else {
6895: PetscFree5(pcbddc->adaptive_constraints_n,
6896: pcbddc->adaptive_constraints_idxs_ptr,
6897: pcbddc->adaptive_constraints_data_ptr,
6898: pcbddc->adaptive_constraints_idxs,
6899: pcbddc->adaptive_constraints_data);
6900: PetscFree(constraints_n);
6901: PetscFree(constraints_idxs_B);
6902: }
6903: return(0);
6904: }
6905: /* #undef PETSC_MISSING_LAPACK_GESVD */
6907: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6908: {
6909: ISLocalToGlobalMapping map;
6910: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
6911: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
6912: PetscInt i,N;
6913: PetscBool rcsr = PETSC_FALSE;
6914: PetscErrorCode ierr;
6917: if (pcbddc->recompute_topography) {
6918: pcbddc->graphanalyzed = PETSC_FALSE;
6919: /* Reset previously computed graph */
6920: PCBDDCGraphReset(pcbddc->mat_graph);
6921: /* Init local Graph struct */
6922: MatGetSize(pc->pmat,&N,NULL);
6923: MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
6924: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
6926: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
6927: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
6928: }
6929: /* Check validity of the csr graph passed in by the user */
6930: if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
6932: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6933: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6934: PetscInt *xadj,*adjncy;
6935: PetscInt nvtxs;
6936: PetscBool flg_row=PETSC_FALSE;
6938: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6939: if (flg_row) {
6940: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
6941: pcbddc->computed_rowadj = PETSC_TRUE;
6942: }
6943: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6944: rcsr = PETSC_TRUE;
6945: }
6946: if (pcbddc->dbg_flag) {
6947: PetscViewerFlush(pcbddc->dbg_viewer);
6948: }
6950: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6951: PetscReal *lcoords;
6952: PetscInt n;
6953: MPI_Datatype dimrealtype;
6955: if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
6956: MatGetLocalSize(matis->A,&n,NULL);
6957: MatISSetUpSF(pc->pmat);
6958: PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
6959: MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
6960: MPI_Type_commit(&dimrealtype);
6961: PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6962: PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6963: MPI_Type_free(&dimrealtype);
6964: PetscFree(pcbddc->mat_graph->coords);
6966: pcbddc->mat_graph->coords = lcoords;
6967: pcbddc->mat_graph->cloc = PETSC_TRUE;
6968: pcbddc->mat_graph->cnloc = n;
6969: }
6970: if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
6971: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);
6973: /* Setup of Graph */
6974: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6975: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
6977: /* attach info on disconnected subdomains if present */
6978: if (pcbddc->n_local_subs) {
6979: PetscInt *local_subs;
6981: PetscMalloc1(N,&local_subs);
6982: for (i=0;i<pcbddc->n_local_subs;i++) {
6983: const PetscInt *idxs;
6984: PetscInt nl,j;
6986: ISGetLocalSize(pcbddc->local_subs[i],&nl);
6987: ISGetIndices(pcbddc->local_subs[i],&idxs);
6988: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
6989: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
6990: }
6991: pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
6992: pcbddc->mat_graph->local_subs = local_subs;
6993: }
6994: }
6996: if (!pcbddc->graphanalyzed) {
6997: /* Graph's connected components analysis */
6998: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6999: pcbddc->graphanalyzed = PETSC_TRUE;
7000: }
7001: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7002: return(0);
7003: }
7005: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
7006: {
7007: PetscInt i,j;
7008: PetscScalar *alphas;
7012: if (!n) return(0);
7013: PetscMalloc1(n,&alphas);
7014: VecNormalize(vecs[0],NULL);
7015: for (i=1;i<n;i++) {
7016: VecMDot(vecs[i],i,vecs,alphas);
7017: for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7018: VecMAXPY(vecs[i],i,alphas,vecs);
7019: VecNormalize(vecs[i],NULL);
7020: }
7021: PetscFree(alphas);
7022: return(0);
7023: }
7025: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7026: {
7027: Mat A;
7028: PetscInt n_neighs,*neighs,*n_shared,**shared;
7029: PetscMPIInt size,rank,color;
7030: PetscInt *xadj,*adjncy;
7031: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7032: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
7033: PetscInt void_procs,*procs_candidates = NULL;
7034: PetscInt xadj_count,*count;
7035: PetscBool ismatis,use_vwgt=PETSC_FALSE;
7036: PetscSubcomm psubcomm;
7037: MPI_Comm subcomm;
7042: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7043: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7046: if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains);
7048: if (have_void) *have_void = PETSC_FALSE;
7049: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7050: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7051: MatISGetLocalMat(mat,&A);
7052: MatGetLocalSize(A,&n,NULL);
7053: im_active = !!n;
7054: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7055: void_procs = size - active_procs;
7056: /* get ranks of of non-active processes in mat communicator */
7057: if (void_procs) {
7058: PetscInt ncand;
7060: if (have_void) *have_void = PETSC_TRUE;
7061: PetscMalloc1(size,&procs_candidates);
7062: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7063: for (i=0,ncand=0;i<size;i++) {
7064: if (!procs_candidates[i]) {
7065: procs_candidates[ncand++] = i;
7066: }
7067: }
7068: /* force n_subdomains to be not greater that the number of non-active processes */
7069: *n_subdomains = PetscMin(void_procs,*n_subdomains);
7070: }
7072: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7073: number of subdomains requested 1 -> send to master or first candidate in voids */
7074: MatGetSize(mat,&N,NULL);
7075: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7076: PetscInt issize,isidx,dest;
7077: if (*n_subdomains == 1) dest = 0;
7078: else dest = rank;
7079: if (im_active) {
7080: issize = 1;
7081: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7082: isidx = procs_candidates[dest];
7083: } else {
7084: isidx = dest;
7085: }
7086: } else {
7087: issize = 0;
7088: isidx = -1;
7089: }
7090: if (*n_subdomains != 1) *n_subdomains = active_procs;
7091: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7092: PetscFree(procs_candidates);
7093: return(0);
7094: }
7095: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7096: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7097: threshold = PetscMax(threshold,2);
7099: /* Get info on mapping */
7100: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7102: /* build local CSR graph of subdomains' connectivity */
7103: PetscMalloc1(2,&xadj);
7104: xadj[0] = 0;
7105: xadj[1] = PetscMax(n_neighs-1,0);
7106: PetscMalloc1(xadj[1],&adjncy);
7107: PetscMalloc1(xadj[1],&adjncy_wgt);
7108: PetscCalloc1(n,&count);
7109: for (i=1;i<n_neighs;i++)
7110: for (j=0;j<n_shared[i];j++)
7111: count[shared[i][j]] += 1;
7113: xadj_count = 0;
7114: for (i=1;i<n_neighs;i++) {
7115: for (j=0;j<n_shared[i];j++) {
7116: if (count[shared[i][j]] < threshold) {
7117: adjncy[xadj_count] = neighs[i];
7118: adjncy_wgt[xadj_count] = n_shared[i];
7119: xadj_count++;
7120: break;
7121: }
7122: }
7123: }
7124: xadj[1] = xadj_count;
7125: PetscFree(count);
7126: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7127: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7129: PetscMalloc1(1,&ranks_send_to_idx);
7131: /* Restrict work on active processes only */
7132: PetscMPIIntCast(im_active,&color);
7133: if (void_procs) {
7134: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7135: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7136: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7137: subcomm = PetscSubcommChild(psubcomm);
7138: } else {
7139: psubcomm = NULL;
7140: subcomm = PetscObjectComm((PetscObject)mat);
7141: }
7143: v_wgt = NULL;
7144: if (!color) {
7145: PetscFree(xadj);
7146: PetscFree(adjncy);
7147: PetscFree(adjncy_wgt);
7148: } else {
7149: Mat subdomain_adj;
7150: IS new_ranks,new_ranks_contig;
7151: MatPartitioning partitioner;
7152: PetscInt rstart=0,rend=0;
7153: PetscInt *is_indices,*oldranks;
7154: PetscMPIInt size;
7155: PetscBool aggregate;
7157: MPI_Comm_size(subcomm,&size);
7158: if (void_procs) {
7159: PetscInt prank = rank;
7160: PetscMalloc1(size,&oldranks);
7161: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7162: for (i=0;i<xadj[1];i++) {
7163: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7164: }
7165: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7166: } else {
7167: oldranks = NULL;
7168: }
7169: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7170: if (aggregate) { /* TODO: all this part could be made more efficient */
7171: PetscInt lrows,row,ncols,*cols;
7172: PetscMPIInt nrank;
7173: PetscScalar *vals;
7175: MPI_Comm_rank(subcomm,&nrank);
7176: lrows = 0;
7177: if (nrank<redprocs) {
7178: lrows = size/redprocs;
7179: if (nrank<size%redprocs) lrows++;
7180: }
7181: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7182: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7183: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7184: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7185: row = nrank;
7186: ncols = xadj[1]-xadj[0];
7187: cols = adjncy;
7188: PetscMalloc1(ncols,&vals);
7189: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7190: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7191: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7192: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7193: PetscFree(xadj);
7194: PetscFree(adjncy);
7195: PetscFree(adjncy_wgt);
7196: PetscFree(vals);
7197: if (use_vwgt) {
7198: Vec v;
7199: const PetscScalar *array;
7200: PetscInt nl;
7202: MatCreateVecs(subdomain_adj,&v,NULL);
7203: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7204: VecAssemblyBegin(v);
7205: VecAssemblyEnd(v);
7206: VecGetLocalSize(v,&nl);
7207: VecGetArrayRead(v,&array);
7208: PetscMalloc1(nl,&v_wgt);
7209: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7210: VecRestoreArrayRead(v,&array);
7211: VecDestroy(&v);
7212: }
7213: } else {
7214: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7215: if (use_vwgt) {
7216: PetscMalloc1(1,&v_wgt);
7217: v_wgt[0] = n;
7218: }
7219: }
7220: /* MatView(subdomain_adj,0); */
7222: /* Partition */
7223: MatPartitioningCreate(subcomm,&partitioner);
7224: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7225: if (v_wgt) {
7226: MatPartitioningSetVertexWeights(partitioner,v_wgt);
7227: }
7228: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7229: MatPartitioningSetNParts(partitioner,*n_subdomains);
7230: MatPartitioningSetFromOptions(partitioner);
7231: MatPartitioningApply(partitioner,&new_ranks);
7232: /* MatPartitioningView(partitioner,0); */
7234: /* renumber new_ranks to avoid "holes" in new set of processors */
7235: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7236: ISDestroy(&new_ranks);
7237: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7238: if (!aggregate) {
7239: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7240: #if defined(PETSC_USE_DEBUG)
7241: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7242: #endif
7243: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7244: } else if (oldranks) {
7245: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7246: } else {
7247: ranks_send_to_idx[0] = is_indices[0];
7248: }
7249: } else {
7250: PetscInt idx = 0;
7251: PetscMPIInt tag;
7252: MPI_Request *reqs;
7254: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7255: PetscMalloc1(rend-rstart,&reqs);
7256: for (i=rstart;i<rend;i++) {
7257: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7258: }
7259: MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7260: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7261: PetscFree(reqs);
7262: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7263: #if defined(PETSC_USE_DEBUG)
7264: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7265: #endif
7266: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7267: } else if (oldranks) {
7268: ranks_send_to_idx[0] = oldranks[idx];
7269: } else {
7270: ranks_send_to_idx[0] = idx;
7271: }
7272: }
7273: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7274: /* clean up */
7275: PetscFree(oldranks);
7276: ISDestroy(&new_ranks_contig);
7277: MatDestroy(&subdomain_adj);
7278: MatPartitioningDestroy(&partitioner);
7279: }
7280: PetscSubcommDestroy(&psubcomm);
7281: PetscFree(procs_candidates);
7283: /* assemble parallel IS for sends */
7284: i = 1;
7285: if (!color) i=0;
7286: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7287: return(0);
7288: }
7290: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7292: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7293: {
7294: Mat local_mat;
7295: IS is_sends_internal;
7296: PetscInt rows,cols,new_local_rows;
7297: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7298: PetscBool ismatis,isdense,newisdense,destroy_mat;
7299: ISLocalToGlobalMapping l2gmap;
7300: PetscInt* l2gmap_indices;
7301: const PetscInt* is_indices;
7302: MatType new_local_type;
7303: /* buffers */
7304: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7305: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7306: PetscInt *recv_buffer_idxs_local;
7307: PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
7308: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7309: /* MPI */
7310: MPI_Comm comm,comm_n;
7311: PetscSubcomm subcomm;
7312: PetscMPIInt n_sends,n_recvs,commsize;
7313: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7314: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7315: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7316: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7317: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7318: PetscErrorCode ierr;
7322: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7323: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7330: if (nvecs) {
7331: if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7333: }
7334: /* further checks */
7335: MatISGetLocalMat(mat,&local_mat);
7336: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7337: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7338: MatGetSize(local_mat,&rows,&cols);
7339: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7340: if (reuse && *mat_n) {
7341: PetscInt mrows,mcols,mnrows,mncols;
7343: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7344: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7345: MatGetSize(mat,&mrows,&mcols);
7346: MatGetSize(*mat_n,&mnrows,&mncols);
7347: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7348: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7349: }
7350: MatGetBlockSize(local_mat,&bs);
7353: /* prepare IS for sending if not provided */
7354: if (!is_sends) {
7355: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7356: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7357: } else {
7358: PetscObjectReference((PetscObject)is_sends);
7359: is_sends_internal = is_sends;
7360: }
7362: /* get comm */
7363: PetscObjectGetComm((PetscObject)mat,&comm);
7365: /* compute number of sends */
7366: ISGetLocalSize(is_sends_internal,&i);
7367: PetscMPIIntCast(i,&n_sends);
7369: /* compute number of receives */
7370: MPI_Comm_size(comm,&commsize);
7371: PetscMalloc1(commsize,&iflags);
7372: PetscMemzero(iflags,commsize*sizeof(*iflags));
7373: ISGetIndices(is_sends_internal,&is_indices);
7374: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7375: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7376: PetscFree(iflags);
7378: /* restrict comm if requested */
7379: subcomm = 0;
7380: destroy_mat = PETSC_FALSE;
7381: if (restrict_comm) {
7382: PetscMPIInt color,subcommsize;
7384: color = 0;
7385: if (restrict_full) {
7386: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7387: } else {
7388: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7389: }
7390: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7391: subcommsize = commsize - subcommsize;
7392: /* check if reuse has been requested */
7393: if (reuse) {
7394: if (*mat_n) {
7395: PetscMPIInt subcommsize2;
7396: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7397: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7398: comm_n = PetscObjectComm((PetscObject)*mat_n);
7399: } else {
7400: comm_n = PETSC_COMM_SELF;
7401: }
7402: } else { /* MAT_INITIAL_MATRIX */
7403: PetscMPIInt rank;
7405: MPI_Comm_rank(comm,&rank);
7406: PetscSubcommCreate(comm,&subcomm);
7407: PetscSubcommSetNumber(subcomm,2);
7408: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7409: comm_n = PetscSubcommChild(subcomm);
7410: }
7411: /* flag to destroy *mat_n if not significative */
7412: if (color) destroy_mat = PETSC_TRUE;
7413: } else {
7414: comm_n = comm;
7415: }
7417: /* prepare send/receive buffers */
7418: PetscMalloc1(commsize,&ilengths_idxs);
7419: PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
7420: PetscMalloc1(commsize,&ilengths_vals);
7421: PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
7422: if (nis) {
7423: PetscCalloc1(commsize,&ilengths_idxs_is);
7424: }
7426: /* Get data from local matrices */
7427: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7428: /* TODO: See below some guidelines on how to prepare the local buffers */
7429: /*
7430: send_buffer_vals should contain the raw values of the local matrix
7431: send_buffer_idxs should contain:
7432: - MatType_PRIVATE type
7433: - PetscInt size_of_l2gmap
7434: - PetscInt global_row_indices[size_of_l2gmap]
7435: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7436: */
7437: else {
7438: MatDenseGetArray(local_mat,&send_buffer_vals);
7439: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7440: PetscMalloc1(i+2,&send_buffer_idxs);
7441: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7442: send_buffer_idxs[1] = i;
7443: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7444: PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7445: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7446: PetscMPIIntCast(i,&len);
7447: for (i=0;i<n_sends;i++) {
7448: ilengths_vals[is_indices[i]] = len*len;
7449: ilengths_idxs[is_indices[i]] = len+2;
7450: }
7451: }
7452: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7453: /* additional is (if any) */
7454: if (nis) {
7455: PetscMPIInt psum;
7456: PetscInt j;
7457: for (j=0,psum=0;j<nis;j++) {
7458: PetscInt plen;
7459: ISGetLocalSize(isarray[j],&plen);
7460: PetscMPIIntCast(plen,&len);
7461: psum += len+1; /* indices + lenght */
7462: }
7463: PetscMalloc1(psum,&send_buffer_idxs_is);
7464: for (j=0,psum=0;j<nis;j++) {
7465: PetscInt plen;
7466: const PetscInt *is_array_idxs;
7467: ISGetLocalSize(isarray[j],&plen);
7468: send_buffer_idxs_is[psum] = plen;
7469: ISGetIndices(isarray[j],&is_array_idxs);
7470: PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7471: ISRestoreIndices(isarray[j],&is_array_idxs);
7472: psum += plen+1; /* indices + lenght */
7473: }
7474: for (i=0;i<n_sends;i++) {
7475: ilengths_idxs_is[is_indices[i]] = psum;
7476: }
7477: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7478: }
7479: MatISRestoreLocalMat(mat,&local_mat);
7481: buf_size_idxs = 0;
7482: buf_size_vals = 0;
7483: buf_size_idxs_is = 0;
7484: buf_size_vecs = 0;
7485: for (i=0;i<n_recvs;i++) {
7486: buf_size_idxs += (PetscInt)olengths_idxs[i];
7487: buf_size_vals += (PetscInt)olengths_vals[i];
7488: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7489: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7490: }
7491: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7492: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7493: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7494: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7496: /* get new tags for clean communications */
7497: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7498: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7499: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7500: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7502: /* allocate for requests */
7503: PetscMalloc1(n_sends,&send_req_idxs);
7504: PetscMalloc1(n_sends,&send_req_vals);
7505: PetscMalloc1(n_sends,&send_req_idxs_is);
7506: PetscMalloc1(n_sends,&send_req_vecs);
7507: PetscMalloc1(n_recvs,&recv_req_idxs);
7508: PetscMalloc1(n_recvs,&recv_req_vals);
7509: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7510: PetscMalloc1(n_recvs,&recv_req_vecs);
7512: /* communications */
7513: ptr_idxs = recv_buffer_idxs;
7514: ptr_vals = recv_buffer_vals;
7515: ptr_idxs_is = recv_buffer_idxs_is;
7516: ptr_vecs = recv_buffer_vecs;
7517: for (i=0;i<n_recvs;i++) {
7518: source_dest = onodes[i];
7519: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7520: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7521: ptr_idxs += olengths_idxs[i];
7522: ptr_vals += olengths_vals[i];
7523: if (nis) {
7524: source_dest = onodes_is[i];
7525: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7526: ptr_idxs_is += olengths_idxs_is[i];
7527: }
7528: if (nvecs) {
7529: source_dest = onodes[i];
7530: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7531: ptr_vecs += olengths_idxs[i]-2;
7532: }
7533: }
7534: for (i=0;i<n_sends;i++) {
7535: PetscMPIIntCast(is_indices[i],&source_dest);
7536: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7537: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7538: if (nis) {
7539: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7540: }
7541: if (nvecs) {
7542: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7543: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7544: }
7545: }
7546: ISRestoreIndices(is_sends_internal,&is_indices);
7547: ISDestroy(&is_sends_internal);
7549: /* assemble new l2g map */
7550: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7551: ptr_idxs = recv_buffer_idxs;
7552: new_local_rows = 0;
7553: for (i=0;i<n_recvs;i++) {
7554: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7555: ptr_idxs += olengths_idxs[i];
7556: }
7557: PetscMalloc1(new_local_rows,&l2gmap_indices);
7558: ptr_idxs = recv_buffer_idxs;
7559: new_local_rows = 0;
7560: for (i=0;i<n_recvs;i++) {
7561: PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7562: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7563: ptr_idxs += olengths_idxs[i];
7564: }
7565: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7566: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7567: PetscFree(l2gmap_indices);
7569: /* infer new local matrix type from received local matrices type */
7570: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7571: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7572: if (n_recvs) {
7573: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7574: ptr_idxs = recv_buffer_idxs;
7575: for (i=0;i<n_recvs;i++) {
7576: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7577: new_local_type_private = MATAIJ_PRIVATE;
7578: break;
7579: }
7580: ptr_idxs += olengths_idxs[i];
7581: }
7582: switch (new_local_type_private) {
7583: case MATDENSE_PRIVATE:
7584: new_local_type = MATSEQAIJ;
7585: bs = 1;
7586: break;
7587: case MATAIJ_PRIVATE:
7588: new_local_type = MATSEQAIJ;
7589: bs = 1;
7590: break;
7591: case MATBAIJ_PRIVATE:
7592: new_local_type = MATSEQBAIJ;
7593: break;
7594: case MATSBAIJ_PRIVATE:
7595: new_local_type = MATSEQSBAIJ;
7596: break;
7597: default:
7598: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7599: break;
7600: }
7601: } else { /* by default, new_local_type is seqaij */
7602: new_local_type = MATSEQAIJ;
7603: bs = 1;
7604: }
7606: /* create MATIS object if needed */
7607: if (!reuse) {
7608: MatGetSize(mat,&rows,&cols);
7609: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7610: } else {
7611: /* it also destroys the local matrices */
7612: if (*mat_n) {
7613: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7614: } else { /* this is a fake object */
7615: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7616: }
7617: }
7618: MatISGetLocalMat(*mat_n,&local_mat);
7619: MatSetType(local_mat,new_local_type);
7621: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7623: /* Global to local map of received indices */
7624: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7625: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7626: ISLocalToGlobalMappingDestroy(&l2gmap);
7628: /* restore attributes -> type of incoming data and its size */
7629: buf_size_idxs = 0;
7630: for (i=0;i<n_recvs;i++) {
7631: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7632: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7633: buf_size_idxs += (PetscInt)olengths_idxs[i];
7634: }
7635: PetscFree(recv_buffer_idxs);
7637: /* set preallocation */
7638: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7639: if (!newisdense) {
7640: PetscInt *new_local_nnz=0;
7642: ptr_idxs = recv_buffer_idxs_local;
7643: if (n_recvs) {
7644: PetscCalloc1(new_local_rows,&new_local_nnz);
7645: }
7646: for (i=0;i<n_recvs;i++) {
7647: PetscInt j;
7648: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7649: for (j=0;j<*(ptr_idxs+1);j++) {
7650: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7651: }
7652: } else {
7653: /* TODO */
7654: }
7655: ptr_idxs += olengths_idxs[i];
7656: }
7657: if (new_local_nnz) {
7658: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7659: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7660: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7661: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7662: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7663: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7664: } else {
7665: MatSetUp(local_mat);
7666: }
7667: PetscFree(new_local_nnz);
7668: } else {
7669: MatSetUp(local_mat);
7670: }
7672: /* set values */
7673: ptr_vals = recv_buffer_vals;
7674: ptr_idxs = recv_buffer_idxs_local;
7675: for (i=0;i<n_recvs;i++) {
7676: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7677: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7678: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7679: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7680: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7681: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7682: } else {
7683: /* TODO */
7684: }
7685: ptr_idxs += olengths_idxs[i];
7686: ptr_vals += olengths_vals[i];
7687: }
7688: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7689: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7690: MatISRestoreLocalMat(*mat_n,&local_mat);
7691: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7692: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7693: PetscFree(recv_buffer_vals);
7695: #if 0
7696: if (!restrict_comm) { /* check */
7697: Vec lvec,rvec;
7698: PetscReal infty_error;
7700: MatCreateVecs(mat,&rvec,&lvec);
7701: VecSetRandom(rvec,NULL);
7702: MatMult(mat,rvec,lvec);
7703: VecScale(lvec,-1.0);
7704: MatMultAdd(*mat_n,rvec,lvec,lvec);
7705: VecNorm(lvec,NORM_INFINITY,&infty_error);
7706: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7707: VecDestroy(&rvec);
7708: VecDestroy(&lvec);
7709: }
7710: #endif
7712: /* assemble new additional is (if any) */
7713: if (nis) {
7714: PetscInt **temp_idxs,*count_is,j,psum;
7716: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7717: PetscCalloc1(nis,&count_is);
7718: ptr_idxs = recv_buffer_idxs_is;
7719: psum = 0;
7720: for (i=0;i<n_recvs;i++) {
7721: for (j=0;j<nis;j++) {
7722: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7723: count_is[j] += plen; /* increment counting of buffer for j-th IS */
7724: psum += plen;
7725: ptr_idxs += plen+1; /* shift pointer to received data */
7726: }
7727: }
7728: PetscMalloc1(nis,&temp_idxs);
7729: PetscMalloc1(psum,&temp_idxs[0]);
7730: for (i=1;i<nis;i++) {
7731: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7732: }
7733: PetscMemzero(count_is,nis*sizeof(PetscInt));
7734: ptr_idxs = recv_buffer_idxs_is;
7735: for (i=0;i<n_recvs;i++) {
7736: for (j=0;j<nis;j++) {
7737: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7738: PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7739: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7740: ptr_idxs += plen+1; /* shift pointer to received data */
7741: }
7742: }
7743: for (i=0;i<nis;i++) {
7744: ISDestroy(&isarray[i]);
7745: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7746: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7747: }
7748: PetscFree(count_is);
7749: PetscFree(temp_idxs[0]);
7750: PetscFree(temp_idxs);
7751: }
7752: /* free workspace */
7753: PetscFree(recv_buffer_idxs_is);
7754: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7755: PetscFree(send_buffer_idxs);
7756: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7757: if (isdense) {
7758: MatISGetLocalMat(mat,&local_mat);
7759: MatDenseRestoreArray(local_mat,&send_buffer_vals);
7760: MatISRestoreLocalMat(mat,&local_mat);
7761: } else {
7762: /* PetscFree(send_buffer_vals); */
7763: }
7764: if (nis) {
7765: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7766: PetscFree(send_buffer_idxs_is);
7767: }
7769: if (nvecs) {
7770: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7771: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7772: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7773: VecDestroy(&nnsp_vec[0]);
7774: VecCreate(comm_n,&nnsp_vec[0]);
7775: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7776: VecSetType(nnsp_vec[0],VECSTANDARD);
7777: /* set values */
7778: ptr_vals = recv_buffer_vecs;
7779: ptr_idxs = recv_buffer_idxs_local;
7780: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7781: for (i=0;i<n_recvs;i++) {
7782: PetscInt j;
7783: for (j=0;j<*(ptr_idxs+1);j++) {
7784: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7785: }
7786: ptr_idxs += olengths_idxs[i];
7787: ptr_vals += olengths_idxs[i]-2;
7788: }
7789: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7790: VecAssemblyBegin(nnsp_vec[0]);
7791: VecAssemblyEnd(nnsp_vec[0]);
7792: }
7794: PetscFree(recv_buffer_vecs);
7795: PetscFree(recv_buffer_idxs_local);
7796: PetscFree(recv_req_idxs);
7797: PetscFree(recv_req_vals);
7798: PetscFree(recv_req_vecs);
7799: PetscFree(recv_req_idxs_is);
7800: PetscFree(send_req_idxs);
7801: PetscFree(send_req_vals);
7802: PetscFree(send_req_vecs);
7803: PetscFree(send_req_idxs_is);
7804: PetscFree(ilengths_vals);
7805: PetscFree(ilengths_idxs);
7806: PetscFree(olengths_vals);
7807: PetscFree(olengths_idxs);
7808: PetscFree(onodes);
7809: if (nis) {
7810: PetscFree(ilengths_idxs_is);
7811: PetscFree(olengths_idxs_is);
7812: PetscFree(onodes_is);
7813: }
7814: PetscSubcommDestroy(&subcomm);
7815: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7816: MatDestroy(mat_n);
7817: for (i=0;i<nis;i++) {
7818: ISDestroy(&isarray[i]);
7819: }
7820: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7821: VecDestroy(&nnsp_vec[0]);
7822: }
7823: *mat_n = NULL;
7824: }
7825: return(0);
7826: }
7828: /* temporary hack into ksp private data structure */
7829: #include <petsc/private/kspimpl.h>
7831: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7832: {
7833: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7834: PC_IS *pcis = (PC_IS*)pc->data;
7835: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
7836: Mat coarsedivudotp = NULL;
7837: Mat coarseG,t_coarse_mat_is;
7838: MatNullSpace CoarseNullSpace = NULL;
7839: ISLocalToGlobalMapping coarse_islg;
7840: IS coarse_is,*isarray;
7841: PetscInt i,im_active=-1,active_procs=-1;
7842: PetscInt nis,nisdofs,nisneu,nisvert;
7843: PC pc_temp;
7844: PCType coarse_pc_type;
7845: KSPType coarse_ksp_type;
7846: PetscBool multilevel_requested,multilevel_allowed;
7847: PetscBool coarse_reuse;
7848: PetscInt ncoarse,nedcfield;
7849: PetscBool compute_vecs = PETSC_FALSE;
7850: PetscScalar *array;
7851: MatReuse coarse_mat_reuse;
7852: PetscBool restr, full_restr, have_void;
7853: PetscMPIInt commsize;
7854: PetscErrorCode ierr;
7857: /* Assign global numbering to coarse dofs */
7858: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7859: PetscInt ocoarse_size;
7860: compute_vecs = PETSC_TRUE;
7862: pcbddc->new_primal_space = PETSC_TRUE;
7863: ocoarse_size = pcbddc->coarse_size;
7864: PetscFree(pcbddc->global_primal_indices);
7865: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
7866: /* see if we can avoid some work */
7867: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7868: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7869: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7870: KSPReset(pcbddc->coarse_ksp);
7871: coarse_reuse = PETSC_FALSE;
7872: } else { /* we can safely reuse already computed coarse matrix */
7873: coarse_reuse = PETSC_TRUE;
7874: }
7875: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7876: coarse_reuse = PETSC_FALSE;
7877: }
7878: /* reset any subassembling information */
7879: if (!coarse_reuse || pcbddc->recompute_topography) {
7880: ISDestroy(&pcbddc->coarse_subassembling);
7881: }
7882: } else { /* primal space is unchanged, so we can reuse coarse matrix */
7883: coarse_reuse = PETSC_TRUE;
7884: }
7885: /* assemble coarse matrix */
7886: if (coarse_reuse && pcbddc->coarse_ksp) {
7887: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
7888: PetscObjectReference((PetscObject)coarse_mat);
7889: coarse_mat_reuse = MAT_REUSE_MATRIX;
7890: } else {
7891: coarse_mat = NULL;
7892: coarse_mat_reuse = MAT_INITIAL_MATRIX;
7893: }
7895: /* creates temporary l2gmap and IS for coarse indexes */
7896: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
7897: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
7899: /* creates temporary MATIS object for coarse matrix */
7900: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
7901: MatDenseGetArray(coarse_submat_dense,&array);
7902: PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
7903: MatDenseRestoreArray(coarse_submat_dense,&array);
7904: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
7905: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
7906: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7907: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7908: MatDestroy(&coarse_submat_dense);
7910: /* count "active" (i.e. with positive local size) and "void" processes */
7911: im_active = !!(pcis->n);
7912: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7914: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7915: /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
7916: /* full_restr : just use the receivers from the subassembling pattern */
7917: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);
7918: coarse_mat_is = NULL;
7919: multilevel_allowed = PETSC_FALSE;
7920: multilevel_requested = PETSC_FALSE;
7921: pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
7922: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7923: if (multilevel_requested) {
7924: ncoarse = active_procs/pcbddc->coarsening_ratio;
7925: restr = PETSC_FALSE;
7926: full_restr = PETSC_FALSE;
7927: } else {
7928: ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc;
7929: restr = PETSC_TRUE;
7930: full_restr = PETSC_TRUE;
7931: }
7932: if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7933: ncoarse = PetscMax(1,ncoarse);
7934: if (!pcbddc->coarse_subassembling) {
7935: if (pcbddc->coarsening_ratio > 1) {
7936: if (multilevel_requested) {
7937: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7938: } else {
7939: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7940: }
7941: } else {
7942: PetscMPIInt rank;
7943: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
7944: have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE;
7945: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
7946: }
7947: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7948: PetscInt psum;
7949: if (pcbddc->coarse_ksp) psum = 1;
7950: else psum = 0;
7951: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7952: if (ncoarse < commsize) have_void = PETSC_TRUE;
7953: }
7954: /* determine if we can go multilevel */
7955: if (multilevel_requested) {
7956: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7957: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7958: }
7959: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
7961: /* dump subassembling pattern */
7962: if (pcbddc->dbg_flag && multilevel_allowed) {
7963: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
7964: }
7966: /* compute dofs splitting and neumann boundaries for coarse dofs */
7967: nedcfield = -1;
7968: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */
7969: PetscInt *tidxs,*tidxs2,nout,tsize,i;
7970: const PetscInt *idxs;
7971: ISLocalToGlobalMapping tmap;
7973: /* create map between primal indices (in local representative ordering) and local primal numbering */
7974: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
7975: /* allocate space for temporary storage */
7976: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
7977: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
7978: /* allocate for IS array */
7979: nisdofs = pcbddc->n_ISForDofsLocal;
7980: if (pcbddc->nedclocal) {
7981: if (pcbddc->nedfield > -1) {
7982: nedcfield = pcbddc->nedfield;
7983: } else {
7984: nedcfield = 0;
7985: if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs);
7986: nisdofs = 1;
7987: }
7988: }
7989: nisneu = !!pcbddc->NeumannBoundariesLocal;
7990: nisvert = 0; /* nisvert is not used */
7991: nis = nisdofs + nisneu + nisvert;
7992: PetscMalloc1(nis,&isarray);
7993: /* dofs splitting */
7994: for (i=0;i<nisdofs;i++) {
7995: /* ISView(pcbddc->ISForDofsLocal[i],0); */
7996: if (nedcfield != i) {
7997: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
7998: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
7999: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8000: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8001: } else {
8002: ISGetLocalSize(pcbddc->nedclocal,&tsize);
8003: ISGetIndices(pcbddc->nedclocal,&idxs);
8004: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8005: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout);
8006: ISRestoreIndices(pcbddc->nedclocal,&idxs);
8007: }
8008: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8009: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8010: /* ISView(isarray[i],0); */
8011: }
8012: /* neumann boundaries */
8013: if (pcbddc->NeumannBoundariesLocal) {
8014: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8015: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8016: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8017: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8018: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8019: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8020: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8021: /* ISView(isarray[nisdofs],0); */
8022: }
8023: /* free memory */
8024: PetscFree(tidxs);
8025: PetscFree(tidxs2);
8026: ISLocalToGlobalMappingDestroy(&tmap);
8027: } else {
8028: nis = 0;
8029: nisdofs = 0;
8030: nisneu = 0;
8031: nisvert = 0;
8032: isarray = NULL;
8033: }
8034: /* destroy no longer needed map */
8035: ISLocalToGlobalMappingDestroy(&coarse_islg);
8037: /* subassemble */
8038: if (multilevel_allowed) {
8039: Vec vp[1];
8040: PetscInt nvecs = 0;
8041: PetscBool reuse,reuser;
8043: if (coarse_mat) reuse = PETSC_TRUE;
8044: else reuse = PETSC_FALSE;
8045: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8046: vp[0] = NULL;
8047: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8048: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8049: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8050: VecSetType(vp[0],VECSTANDARD);
8051: nvecs = 1;
8053: if (pcbddc->divudotp) {
8054: Mat B,loc_divudotp;
8055: Vec v,p;
8056: IS dummy;
8057: PetscInt np;
8059: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8060: MatGetSize(loc_divudotp,&np,NULL);
8061: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8062: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8063: MatCreateVecs(B,&v,&p);
8064: VecSet(p,1.);
8065: MatMultTranspose(B,p,v);
8066: VecDestroy(&p);
8067: MatDestroy(&B);
8068: VecGetArray(vp[0],&array);
8069: VecPlaceArray(pcbddc->vec1_P,array);
8070: VecRestoreArray(vp[0],&array);
8071: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8072: VecResetArray(pcbddc->vec1_P);
8073: ISDestroy(&dummy);
8074: VecDestroy(&v);
8075: }
8076: }
8077: if (reuser) {
8078: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8079: } else {
8080: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8081: }
8082: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8083: PetscScalar *arraym,*arrayv;
8084: PetscInt nl;
8085: VecGetLocalSize(vp[0],&nl);
8086: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8087: MatDenseGetArray(coarsedivudotp,&arraym);
8088: VecGetArray(vp[0],&arrayv);
8089: PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
8090: VecRestoreArray(vp[0],&arrayv);
8091: MatDenseRestoreArray(coarsedivudotp,&arraym);
8092: VecDestroy(&vp[0]);
8093: } else {
8094: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8095: }
8096: } else {
8097: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8098: }
8099: if (coarse_mat_is || coarse_mat) {
8100: PetscMPIInt size;
8101: MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);
8102: if (!multilevel_allowed) {
8103: MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
8104: } else {
8105: Mat A;
8107: /* if this matrix is present, it means we are not reusing the coarse matrix */
8108: if (coarse_mat_is) {
8109: if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8110: PetscObjectReference((PetscObject)coarse_mat_is);
8111: coarse_mat = coarse_mat_is;
8112: }
8113: /* be sure we don't have MatSeqDENSE as local mat */
8114: MatISGetLocalMat(coarse_mat,&A);
8115: MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
8116: }
8117: }
8118: MatDestroy(&t_coarse_mat_is);
8119: MatDestroy(&coarse_mat_is);
8121: /* create local to global scatters for coarse problem */
8122: if (compute_vecs) {
8123: PetscInt lrows;
8124: VecDestroy(&pcbddc->coarse_vec);
8125: if (coarse_mat) {
8126: MatGetLocalSize(coarse_mat,&lrows,NULL);
8127: } else {
8128: lrows = 0;
8129: }
8130: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8131: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8132: VecSetType(pcbddc->coarse_vec,VECSTANDARD);
8133: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8134: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8135: }
8136: ISDestroy(&coarse_is);
8138: /* set defaults for coarse KSP and PC */
8139: if (multilevel_allowed) {
8140: coarse_ksp_type = KSPRICHARDSON;
8141: coarse_pc_type = PCBDDC;
8142: } else {
8143: coarse_ksp_type = KSPPREONLY;
8144: coarse_pc_type = PCREDUNDANT;
8145: }
8147: /* print some info if requested */
8148: if (pcbddc->dbg_flag) {
8149: if (!multilevel_allowed) {
8150: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8151: if (multilevel_requested) {
8152: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8153: } else if (pcbddc->max_levels) {
8154: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
8155: }
8156: PetscViewerFlush(pcbddc->dbg_viewer);
8157: }
8158: }
8160: /* communicate coarse discrete gradient */
8161: coarseG = NULL;
8162: if (pcbddc->nedcG && multilevel_allowed) {
8163: MPI_Comm ccomm;
8164: if (coarse_mat) {
8165: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8166: } else {
8167: ccomm = MPI_COMM_NULL;
8168: }
8169: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8170: }
8172: /* create the coarse KSP object only once with defaults */
8173: if (coarse_mat) {
8174: PetscBool isredundant,isnn,isbddc;
8175: PetscViewer dbg_viewer = NULL;
8177: if (pcbddc->dbg_flag) {
8178: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8179: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8180: }
8181: if (!pcbddc->coarse_ksp) {
8182: char prefix[256],str_level[16];
8183: size_t len;
8185: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8186: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8187: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8188: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8189: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8190: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8191: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8192: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8193: /* TODO is this logic correct? should check for coarse_mat type */
8194: PCSetType(pc_temp,coarse_pc_type);
8195: /* prefix */
8196: PetscStrcpy(prefix,"");
8197: PetscStrcpy(str_level,"");
8198: if (!pcbddc->current_level) {
8199: PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8200: PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8201: } else {
8202: PetscStrlen(((PetscObject)pc)->prefix,&len);
8203: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8204: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8205: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8206: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8207: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8208: PetscStrlcat(prefix,str_level,sizeof(prefix));
8209: }
8210: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8211: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8212: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8213: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8214: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8215: /* allow user customization */
8216: KSPSetFromOptions(pcbddc->coarse_ksp);
8217: }
8218: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8219: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8220: if (nisdofs) {
8221: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8222: for (i=0;i<nisdofs;i++) {
8223: ISDestroy(&isarray[i]);
8224: }
8225: }
8226: if (nisneu) {
8227: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8228: ISDestroy(&isarray[nisdofs]);
8229: }
8230: if (nisvert) {
8231: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8232: ISDestroy(&isarray[nis-1]);
8233: }
8234: if (coarseG) {
8235: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8236: }
8238: /* get some info after set from options */
8239: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8240: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8241: if (isbddc && !multilevel_allowed) {
8242: PCSetType(pc_temp,coarse_pc_type);
8243: isbddc = PETSC_FALSE;
8244: }
8245: /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8246: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8247: if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) {
8248: PCSetType(pc_temp,PCBDDC);
8249: isbddc = PETSC_TRUE;
8250: }
8251: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8252: if (isredundant) {
8253: KSP inner_ksp;
8254: PC inner_pc;
8256: PCRedundantGetKSP(pc_temp,&inner_ksp);
8257: KSPGetPC(inner_ksp,&inner_pc);
8258: }
8260: /* parameters which miss an API */
8261: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8262: if (isbddc) {
8263: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8265: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8266: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8267: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8268: if (pcbddc_coarse->benign_saddle_point) {
8269: Mat coarsedivudotp_is;
8270: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8271: IS row,col;
8272: const PetscInt *gidxs;
8273: PetscInt n,st,M,N;
8275: MatGetSize(coarsedivudotp,&n,NULL);
8276: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8277: st = st-n;
8278: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8279: MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8280: ISLocalToGlobalMappingGetSize(l2gmap,&n);
8281: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8282: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8283: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8284: ISLocalToGlobalMappingCreateIS(row,&rl2g);
8285: ISLocalToGlobalMappingCreateIS(col,&cl2g);
8286: ISGetSize(row,&M);
8287: MatGetSize(coarse_mat,&N,NULL);
8288: ISDestroy(&row);
8289: ISDestroy(&col);
8290: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8291: MatSetType(coarsedivudotp_is,MATIS);
8292: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8293: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8294: ISLocalToGlobalMappingDestroy(&rl2g);
8295: ISLocalToGlobalMappingDestroy(&cl2g);
8296: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8297: MatDestroy(&coarsedivudotp);
8298: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8299: MatDestroy(&coarsedivudotp_is);
8300: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8301: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8302: }
8303: }
8305: /* propagate symmetry info of coarse matrix */
8306: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8307: if (pc->pmat->symmetric_set) {
8308: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8309: }
8310: if (pc->pmat->hermitian_set) {
8311: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8312: }
8313: if (pc->pmat->spd_set) {
8314: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8315: }
8316: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8317: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8318: }
8319: /* set operators */
8320: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8321: if (pcbddc->dbg_flag) {
8322: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8323: }
8324: }
8325: MatDestroy(&coarseG);
8326: PetscFree(isarray);
8327: #if 0
8328: {
8329: PetscViewer viewer;
8330: char filename[256];
8331: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8332: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8333: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8334: MatView(coarse_mat,viewer);
8335: PetscViewerPopFormat(viewer);
8336: PetscViewerDestroy(&viewer);
8337: }
8338: #endif
8340: if (pcbddc->coarse_ksp) {
8341: Vec crhs,csol;
8343: KSPGetSolution(pcbddc->coarse_ksp,&csol);
8344: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8345: if (!csol) {
8346: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8347: }
8348: if (!crhs) {
8349: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8350: }
8351: }
8352: MatDestroy(&coarsedivudotp);
8354: /* compute null space for coarse solver if the benign trick has been requested */
8355: if (pcbddc->benign_null) {
8357: VecSet(pcbddc->vec1_P,0.);
8358: for (i=0;i<pcbddc->benign_n;i++) {
8359: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8360: }
8361: VecAssemblyBegin(pcbddc->vec1_P);
8362: VecAssemblyEnd(pcbddc->vec1_P);
8363: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8364: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8365: if (coarse_mat) {
8366: Vec nullv;
8367: PetscScalar *array,*array2;
8368: PetscInt nl;
8370: MatCreateVecs(coarse_mat,&nullv,NULL);
8371: VecGetLocalSize(nullv,&nl);
8372: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8373: VecGetArray(nullv,&array2);
8374: PetscMemcpy(array2,array,nl*sizeof(*array));
8375: VecRestoreArray(nullv,&array2);
8376: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8377: VecNormalize(nullv,NULL);
8378: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8379: VecDestroy(&nullv);
8380: }
8381: }
8383: if (pcbddc->coarse_ksp) {
8384: PetscBool ispreonly;
8386: if (CoarseNullSpace) {
8387: PetscBool isnull;
8388: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8389: if (isnull) {
8390: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8391: }
8392: /* TODO: add local nullspaces (if any) */
8393: }
8394: /* setup coarse ksp */
8395: KSPSetUp(pcbddc->coarse_ksp);
8396: /* Check coarse problem if in debug mode or if solving with an iterative method */
8397: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8398: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8399: KSP check_ksp;
8400: KSPType check_ksp_type;
8401: PC check_pc;
8402: Vec check_vec,coarse_vec;
8403: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8404: PetscInt its;
8405: PetscBool compute_eigs;
8406: PetscReal *eigs_r,*eigs_c;
8407: PetscInt neigs;
8408: const char *prefix;
8410: /* Create ksp object suitable for estimation of extreme eigenvalues */
8411: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8412: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8413: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8414: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8415: /* prevent from setup unneeded object */
8416: KSPGetPC(check_ksp,&check_pc);
8417: PCSetType(check_pc,PCNONE);
8418: if (ispreonly) {
8419: check_ksp_type = KSPPREONLY;
8420: compute_eigs = PETSC_FALSE;
8421: } else {
8422: check_ksp_type = KSPGMRES;
8423: compute_eigs = PETSC_TRUE;
8424: }
8425: KSPSetType(check_ksp,check_ksp_type);
8426: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8427: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8428: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8429: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8430: KSPSetOptionsPrefix(check_ksp,prefix);
8431: KSPAppendOptionsPrefix(check_ksp,"check_");
8432: KSPSetFromOptions(check_ksp);
8433: KSPSetUp(check_ksp);
8434: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8435: KSPSetPC(check_ksp,check_pc);
8436: /* create random vec */
8437: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8438: VecSetRandom(check_vec,NULL);
8439: MatMult(coarse_mat,check_vec,coarse_vec);
8440: /* solve coarse problem */
8441: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8442: /* set eigenvalue estimation if preonly has not been requested */
8443: if (compute_eigs) {
8444: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8445: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8446: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8447: if (neigs) {
8448: lambda_max = eigs_r[neigs-1];
8449: lambda_min = eigs_r[0];
8450: if (pcbddc->use_coarse_estimates) {
8451: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8452: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8453: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8454: }
8455: }
8456: }
8457: }
8459: /* check coarse problem residual error */
8460: if (pcbddc->dbg_flag) {
8461: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8462: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8463: VecAXPY(check_vec,-1.0,coarse_vec);
8464: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8465: MatMult(coarse_mat,check_vec,coarse_vec);
8466: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8467: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8468: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8469: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8470: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8471: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8472: if (CoarseNullSpace) {
8473: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8474: }
8475: if (compute_eigs) {
8476: PetscReal lambda_max_s,lambda_min_s;
8477: KSPConvergedReason reason;
8478: KSPGetType(check_ksp,&check_ksp_type);
8479: KSPGetIterationNumber(check_ksp,&its);
8480: KSPGetConvergedReason(check_ksp,&reason);
8481: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8482: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8483: for (i=0;i<neigs;i++) {
8484: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8485: }
8486: }
8487: PetscViewerFlush(dbg_viewer);
8488: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8489: }
8490: VecDestroy(&check_vec);
8491: VecDestroy(&coarse_vec);
8492: KSPDestroy(&check_ksp);
8493: if (compute_eigs) {
8494: PetscFree(eigs_r);
8495: PetscFree(eigs_c);
8496: }
8497: }
8498: }
8499: MatNullSpaceDestroy(&CoarseNullSpace);
8500: /* print additional info */
8501: if (pcbddc->dbg_flag) {
8502: /* waits until all processes reaches this point */
8503: PetscBarrier((PetscObject)pc);
8504: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
8505: PetscViewerFlush(pcbddc->dbg_viewer);
8506: }
8508: /* free memory */
8509: MatDestroy(&coarse_mat);
8510: return(0);
8511: }
8513: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8514: {
8515: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8516: PC_IS* pcis = (PC_IS*)pc->data;
8517: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8518: IS subset,subset_mult,subset_n;
8519: PetscInt local_size,coarse_size=0;
8520: PetscInt *local_primal_indices=NULL;
8521: const PetscInt *t_local_primal_indices;
8525: /* Compute global number of coarse dofs */
8526: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8527: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8528: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8529: ISDestroy(&subset_n);
8530: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8531: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8532: ISDestroy(&subset);
8533: ISDestroy(&subset_mult);
8534: ISGetLocalSize(subset_n,&local_size);
8535: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8536: PetscMalloc1(local_size,&local_primal_indices);
8537: ISGetIndices(subset_n,&t_local_primal_indices);
8538: PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8539: ISRestoreIndices(subset_n,&t_local_primal_indices);
8540: ISDestroy(&subset_n);
8542: /* check numbering */
8543: if (pcbddc->dbg_flag) {
8544: PetscScalar coarsesum,*array,*array2;
8545: PetscInt i;
8546: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8548: PetscViewerFlush(pcbddc->dbg_viewer);
8549: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8550: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8551: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8552: /* counter */
8553: VecSet(pcis->vec1_global,0.0);
8554: VecSet(pcis->vec1_N,1.0);
8555: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8556: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8557: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8558: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8559: VecSet(pcis->vec1_N,0.0);
8560: for (i=0;i<pcbddc->local_primal_size;i++) {
8561: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8562: }
8563: VecAssemblyBegin(pcis->vec1_N);
8564: VecAssemblyEnd(pcis->vec1_N);
8565: VecSet(pcis->vec1_global,0.0);
8566: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8567: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8568: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8569: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8570: VecGetArray(pcis->vec1_N,&array);
8571: VecGetArray(pcis->vec2_N,&array2);
8572: for (i=0;i<pcis->n;i++) {
8573: if (array[i] != 0.0 && array[i] != array2[i]) {
8574: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8575: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8576: set_error = PETSC_TRUE;
8577: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8578: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);
8579: }
8580: }
8581: VecRestoreArray(pcis->vec2_N,&array2);
8582: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8583: PetscViewerFlush(pcbddc->dbg_viewer);
8584: for (i=0;i<pcis->n;i++) {
8585: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8586: }
8587: VecRestoreArray(pcis->vec1_N,&array);
8588: VecSet(pcis->vec1_global,0.0);
8589: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8590: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8591: VecSum(pcis->vec1_global,&coarsesum);
8592: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8593: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8594: PetscInt *gidxs;
8596: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8597: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8598: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8599: PetscViewerFlush(pcbddc->dbg_viewer);
8600: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8601: for (i=0;i<pcbddc->local_primal_size;i++) {
8602: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8603: }
8604: PetscViewerFlush(pcbddc->dbg_viewer);
8605: PetscFree(gidxs);
8606: }
8607: PetscViewerFlush(pcbddc->dbg_viewer);
8608: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8609: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8610: }
8611: /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
8612: /* get back data */
8613: *coarse_size_n = coarse_size;
8614: *local_primal_indices_n = local_primal_indices;
8615: return(0);
8616: }
8618: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8619: {
8620: IS localis_t;
8621: PetscInt i,lsize,*idxs,n;
8622: PetscScalar *vals;
8626: /* get indices in local ordering exploiting local to global map */
8627: ISGetLocalSize(globalis,&lsize);
8628: PetscMalloc1(lsize,&vals);
8629: for (i=0;i<lsize;i++) vals[i] = 1.0;
8630: ISGetIndices(globalis,(const PetscInt**)&idxs);
8631: VecSet(gwork,0.0);
8632: VecSet(lwork,0.0);
8633: if (idxs) { /* multilevel guard */
8634: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8635: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8636: }
8637: VecAssemblyBegin(gwork);
8638: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8639: PetscFree(vals);
8640: VecAssemblyEnd(gwork);
8641: /* now compute set in local ordering */
8642: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8643: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8644: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8645: VecGetSize(lwork,&n);
8646: for (i=0,lsize=0;i<n;i++) {
8647: if (PetscRealPart(vals[i]) > 0.5) {
8648: lsize++;
8649: }
8650: }
8651: PetscMalloc1(lsize,&idxs);
8652: for (i=0,lsize=0;i<n;i++) {
8653: if (PetscRealPart(vals[i]) > 0.5) {
8654: idxs[lsize++] = i;
8655: }
8656: }
8657: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8658: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8659: *localis = localis_t;
8660: return(0);
8661: }
8663: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8664: {
8665: PC_IS *pcis=(PC_IS*)pc->data;
8666: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8667: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
8668: Mat S_j;
8669: PetscInt *used_xadj,*used_adjncy;
8670: PetscBool free_used_adj;
8671: PetscErrorCode ierr;
8674: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8675: free_used_adj = PETSC_FALSE;
8676: if (pcbddc->sub_schurs_layers == -1) {
8677: used_xadj = NULL;
8678: used_adjncy = NULL;
8679: } else {
8680: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8681: used_xadj = pcbddc->mat_graph->xadj;
8682: used_adjncy = pcbddc->mat_graph->adjncy;
8683: } else if (pcbddc->computed_rowadj) {
8684: used_xadj = pcbddc->mat_graph->xadj;
8685: used_adjncy = pcbddc->mat_graph->adjncy;
8686: } else {
8687: PetscBool flg_row=PETSC_FALSE;
8688: const PetscInt *xadj,*adjncy;
8689: PetscInt nvtxs;
8691: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8692: if (flg_row) {
8693: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8694: PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8695: PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8696: free_used_adj = PETSC_TRUE;
8697: } else {
8698: pcbddc->sub_schurs_layers = -1;
8699: used_xadj = NULL;
8700: used_adjncy = NULL;
8701: }
8702: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8703: }
8704: }
8706: /* setup sub_schurs data */
8707: MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8708: if (!sub_schurs->schur_explicit) {
8709: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8710: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8711: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8712: } else {
8713: Mat change = NULL;
8714: Vec scaling = NULL;
8715: IS change_primal = NULL, iP;
8716: PetscInt benign_n;
8717: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8718: PetscBool isseqaij,need_change = PETSC_FALSE;
8719: PetscBool discrete_harmonic = PETSC_FALSE;
8721: if (!pcbddc->use_vertices && reuse_solvers) {
8722: PetscInt n_vertices;
8724: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8725: reuse_solvers = (PetscBool)!n_vertices;
8726: }
8727: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8728: if (!isseqaij) {
8729: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8730: if (matis->A == pcbddc->local_mat) {
8731: MatDestroy(&pcbddc->local_mat);
8732: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8733: } else {
8734: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8735: }
8736: }
8737: if (!pcbddc->benign_change_explicit) {
8738: benign_n = pcbddc->benign_n;
8739: } else {
8740: benign_n = 0;
8741: }
8742: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8743: We need a global reduction to avoid possible deadlocks.
8744: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8745: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8746: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8747: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8748: need_change = (PetscBool)(!need_change);
8749: }
8750: /* If the user defines additional constraints, we import them here.
8751: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
8752: if (need_change) {
8753: PC_IS *pcisf;
8754: PC_BDDC *pcbddcf;
8755: PC pcf;
8757: if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
8758: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
8759: PCSetOperators(pcf,pc->mat,pc->pmat);
8760: PCSetType(pcf,PCBDDC);
8762: /* hacks */
8763: pcisf = (PC_IS*)pcf->data;
8764: pcisf->is_B_local = pcis->is_B_local;
8765: pcisf->vec1_N = pcis->vec1_N;
8766: pcisf->BtoNmap = pcis->BtoNmap;
8767: pcisf->n = pcis->n;
8768: pcisf->n_B = pcis->n_B;
8769: pcbddcf = (PC_BDDC*)pcf->data;
8770: PetscFree(pcbddcf->mat_graph);
8771: pcbddcf->mat_graph = pcbddc->mat_graph;
8772: pcbddcf->use_faces = PETSC_TRUE;
8773: pcbddcf->use_change_of_basis = PETSC_TRUE;
8774: pcbddcf->use_change_on_faces = PETSC_TRUE;
8775: pcbddcf->use_qr_single = PETSC_TRUE;
8776: pcbddcf->fake_change = PETSC_TRUE;
8778: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
8779: PCBDDCConstraintsSetUp(pcf);
8780: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
8781: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
8782: change = pcbddcf->ConstraintMatrix;
8783: pcbddcf->ConstraintMatrix = NULL;
8785: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
8786: PetscFree(pcbddcf->sub_schurs);
8787: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
8788: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
8789: PetscFree(pcbddcf->primal_indices_local_idxs);
8790: PetscFree(pcbddcf->onearnullvecs_state);
8791: PetscFree(pcf->data);
8792: pcf->ops->destroy = NULL;
8793: pcf->ops->reset = NULL;
8794: PCDestroy(&pcf);
8795: }
8796: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
8798: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
8799: if (iP) {
8800: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
8801: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
8802: PetscOptionsEnd();
8803: }
8804: if (discrete_harmonic) {
8805: Mat A;
8806: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
8807: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
8808: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
8809: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8810: MatDestroy(&A);
8811: } else {
8812: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8813: }
8814: MatDestroy(&change);
8815: ISDestroy(&change_primal);
8816: }
8817: MatDestroy(&S_j);
8819: /* free adjacency */
8820: if (free_used_adj) {
8821: PetscFree2(used_xadj,used_adjncy);
8822: }
8823: return(0);
8824: }
8826: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8827: {
8828: PC_IS *pcis=(PC_IS*)pc->data;
8829: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8830: PCBDDCGraph graph;
8831: PetscErrorCode ierr;
8834: /* attach interface graph for determining subsets */
8835: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8836: IS verticesIS,verticescomm;
8837: PetscInt vsize,*idxs;
8839: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8840: ISGetSize(verticesIS,&vsize);
8841: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
8842: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
8843: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
8844: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8845: PCBDDCGraphCreate(&graph);
8846: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
8847: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
8848: ISDestroy(&verticescomm);
8849: PCBDDCGraphComputeConnectedComponents(graph);
8850: } else {
8851: graph = pcbddc->mat_graph;
8852: }
8853: /* print some info */
8854: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8855: IS vertices;
8856: PetscInt nv,nedges,nfaces;
8857: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
8858: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8859: ISGetSize(vertices,&nv);
8860: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8861: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
8862: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
8863: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
8864: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
8865: PetscViewerFlush(pcbddc->dbg_viewer);
8866: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8867: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8868: }
8870: /* sub_schurs init */
8871: if (!pcbddc->sub_schurs) {
8872: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8873: }
8874: PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
8876: /* free graph struct */
8877: if (pcbddc->sub_schurs_rebuild) {
8878: PCBDDCGraphDestroy(&graph);
8879: }
8880: return(0);
8881: }
8883: PetscErrorCode PCBDDCCheckOperator(PC pc)
8884: {
8885: PC_IS *pcis=(PC_IS*)pc->data;
8886: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8887: PetscErrorCode ierr;
8890: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8891: IS zerodiag = NULL;
8892: Mat S_j,B0_B=NULL;
8893: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
8894: PetscScalar *p0_check,*array,*array2;
8895: PetscReal norm;
8896: PetscInt i;
8898: /* B0 and B0_B */
8899: if (zerodiag) {
8900: IS dummy;
8902: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
8903: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
8904: MatCreateVecs(B0_B,NULL,&dummy_vec);
8905: ISDestroy(&dummy);
8906: }
8907: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8908: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
8909: VecSet(pcbddc->vec1_P,1.0);
8910: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8911: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8912: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8913: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8914: VecReciprocal(vec_scale_P);
8915: /* S_j */
8916: MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8917: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8919: /* mimic vector in \widetilde{W}_\Gamma */
8920: VecSetRandom(pcis->vec1_N,NULL);
8921: /* continuous in primal space */
8922: VecSetRandom(pcbddc->coarse_vec,NULL);
8923: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8924: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8925: VecGetArray(pcbddc->vec1_P,&array);
8926: PetscCalloc1(pcbddc->benign_n,&p0_check);
8927: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
8928: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8929: VecRestoreArray(pcbddc->vec1_P,&array);
8930: VecAssemblyBegin(pcis->vec1_N);
8931: VecAssemblyEnd(pcis->vec1_N);
8932: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8933: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8934: VecDuplicate(pcis->vec2_B,&vec_check_B);
8935: VecCopy(pcis->vec2_B,vec_check_B);
8937: /* assemble rhs for coarse problem */
8938: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8939: /* local with Schur */
8940: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
8941: if (zerodiag) {
8942: VecGetArray(dummy_vec,&array);
8943: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
8944: VecRestoreArray(dummy_vec,&array);
8945: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
8946: }
8947: /* sum on primal nodes the local contributions */
8948: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8949: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8950: VecGetArray(pcis->vec1_N,&array);
8951: VecGetArray(pcbddc->vec1_P,&array2);
8952: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8953: VecRestoreArray(pcbddc->vec1_P,&array2);
8954: VecRestoreArray(pcis->vec1_N,&array);
8955: VecSet(pcbddc->coarse_vec,0.);
8956: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8957: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8958: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8959: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8960: VecGetArray(pcbddc->vec1_P,&array);
8961: /* scale primal nodes (BDDC sums contibutions) */
8962: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
8963: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8964: VecRestoreArray(pcbddc->vec1_P,&array);
8965: VecAssemblyBegin(pcis->vec1_N);
8966: VecAssemblyEnd(pcis->vec1_N);
8967: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8968: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8969: /* global: \widetilde{B0}_B w_\Gamma */
8970: if (zerodiag) {
8971: MatMult(B0_B,pcis->vec2_B,dummy_vec);
8972: VecGetArray(dummy_vec,&array);
8973: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
8974: VecRestoreArray(dummy_vec,&array);
8975: }
8976: /* BDDC */
8977: VecSet(pcis->vec1_D,0.);
8978: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
8980: VecCopy(pcis->vec1_B,pcis->vec2_B);
8981: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
8982: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
8983: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
8984: for (i=0;i<pcbddc->benign_n;i++) {
8985: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
8986: }
8987: PetscFree(p0_check);
8988: VecDestroy(&vec_scale_P);
8989: VecDestroy(&vec_check_B);
8990: VecDestroy(&dummy_vec);
8991: MatDestroy(&S_j);
8992: MatDestroy(&B0_B);
8993: }
8994: return(0);
8995: }
8997: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8998: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8999: {
9000: Mat At;
9001: IS rows;
9002: PetscInt rst,ren;
9004: PetscLayout rmap;
9007: rst = ren = 0;
9008: if (ccomm != MPI_COMM_NULL) {
9009: PetscLayoutCreate(ccomm,&rmap);
9010: PetscLayoutSetSize(rmap,A->rmap->N);
9011: PetscLayoutSetBlockSize(rmap,1);
9012: PetscLayoutSetUp(rmap);
9013: PetscLayoutGetRange(rmap,&rst,&ren);
9014: }
9015: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9016: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9017: ISDestroy(&rows);
9019: if (ccomm != MPI_COMM_NULL) {
9020: Mat_MPIAIJ *a,*b;
9021: IS from,to;
9022: Vec gvec;
9023: PetscInt lsize;
9025: MatCreate(ccomm,B);
9026: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9027: MatSetType(*B,MATAIJ);
9028: PetscLayoutDestroy(&((*B)->rmap));
9029: PetscLayoutSetUp((*B)->cmap);
9030: a = (Mat_MPIAIJ*)At->data;
9031: b = (Mat_MPIAIJ*)(*B)->data;
9032: MPI_Comm_size(ccomm,&b->size);
9033: MPI_Comm_rank(ccomm,&b->rank);
9034: PetscObjectReference((PetscObject)a->A);
9035: PetscObjectReference((PetscObject)a->B);
9036: b->A = a->A;
9037: b->B = a->B;
9039: b->donotstash = a->donotstash;
9040: b->roworiented = a->roworiented;
9041: b->rowindices = 0;
9042: b->rowvalues = 0;
9043: b->getrowactive = PETSC_FALSE;
9045: (*B)->rmap = rmap;
9046: (*B)->factortype = A->factortype;
9047: (*B)->assembled = PETSC_TRUE;
9048: (*B)->insertmode = NOT_SET_VALUES;
9049: (*B)->preallocated = PETSC_TRUE;
9051: if (a->colmap) {
9052: #if defined(PETSC_USE_CTABLE)
9053: PetscTableCreateCopy(a->colmap,&b->colmap);
9054: #else
9055: PetscMalloc1(At->cmap->N,&b->colmap);
9056: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9057: PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
9058: #endif
9059: } else b->colmap = 0;
9060: if (a->garray) {
9061: PetscInt len;
9062: len = a->B->cmap->n;
9063: PetscMalloc1(len+1,&b->garray);
9064: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9065: if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
9066: } else b->garray = 0;
9068: PetscObjectReference((PetscObject)a->lvec);
9069: b->lvec = a->lvec;
9070: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
9072: /* cannot use VecScatterCopy */
9073: VecGetLocalSize(b->lvec,&lsize);
9074: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9075: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9076: MatCreateVecs(*B,&gvec,NULL);
9077: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9078: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9079: ISDestroy(&from);
9080: ISDestroy(&to);
9081: VecDestroy(&gvec);
9082: }
9083: MatDestroy(&At);
9084: return(0);
9085: }