Actual source code: bddcprivate.c
petsc-3.13.3 2020-07-01
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: #if !defined(PETSC_USE_COMPLEX)
18: PetscScalar *uwork,*data,*U, ds = 0.;
19: PetscReal *sing;
20: PetscBLASInt bM,bN,lwork,lierr,di = 1;
21: PetscInt ulw,i,nr,nc,n;
25: MatGetSize(A,&nr,&nc);
26: if (!nr || !nc) return(0);
28: /* workspace */
29: if (!work) {
30: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
31: PetscMalloc1(ulw,&uwork);
32: } else {
33: ulw = lw;
34: uwork = work;
35: }
36: n = PetscMin(nr,nc);
37: if (!rwork) {
38: PetscMalloc1(n,&sing);
39: } else {
40: sing = rwork;
41: }
43: /* SVD */
44: PetscMalloc1(nr*nr,&U);
45: PetscBLASIntCast(nr,&bM);
46: PetscBLASIntCast(nc,&bN);
47: PetscBLASIntCast(ulw,&lwork);
48: MatDenseGetArray(A,&data);
49: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
50: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
51: PetscFPTrapPop();
52: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
53: MatDenseRestoreArray(A,&data);
54: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
55: if (!rwork) {
56: PetscFree(sing);
57: }
58: if (!work) {
59: PetscFree(uwork);
60: }
61: /* create B */
62: if (!range) {
63: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
64: MatDenseGetArray(*B,&data);
65: PetscArraycpy(data,U+nr*i,(nr-i)*nr);
66: } else {
67: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
68: MatDenseGetArray(*B,&data);
69: PetscArraycpy(data,U,i*nr);
70: }
71: MatDenseRestoreArray(*B,&data);
72: PetscFree(U);
73: #else /* PETSC_USE_COMPLEX */
75: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
76: #endif
77: return(0);
78: }
80: /* TODO REMOVE */
81: #if defined(PRINT_GDET)
82: static int inc = 0;
83: static int lev = 0;
84: #endif
86: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
87: {
89: Mat GE,GEd;
90: PetscInt rsize,csize,esize;
91: PetscScalar *ptr;
94: ISGetSize(edge,&esize);
95: if (!esize) return(0);
96: ISGetSize(extrow,&rsize);
97: ISGetSize(extcol,&csize);
99: /* gradients */
100: ptr = work + 5*esize;
101: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
102: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
103: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
104: MatDestroy(&GE);
106: /* constants */
107: ptr += rsize*csize;
108: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
109: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
110: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
111: MatDestroy(&GE);
112: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
113: MatDestroy(&GEd);
115: if (corners) {
116: Mat GEc;
117: const PetscScalar *vals;
118: PetscScalar v;
120: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
121: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
122: MatDenseGetArrayRead(GEd,&vals);
123: /* v = PetscAbsScalar(vals[0]) */;
124: v = 1.;
125: cvals[0] = vals[0]/v;
126: cvals[1] = vals[1]/v;
127: MatDenseRestoreArrayRead(GEd,&vals);
128: MatScale(*GKins,1./v);
129: #if defined(PRINT_GDET)
130: {
131: PetscViewer viewer;
132: char filename[256];
133: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
134: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
135: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
136: PetscObjectSetName((PetscObject)GEc,"GEc");
137: MatView(GEc,viewer);
138: PetscObjectSetName((PetscObject)(*GKins),"GK");
139: MatView(*GKins,viewer);
140: PetscObjectSetName((PetscObject)GEd,"Gproj");
141: MatView(GEd,viewer);
142: PetscViewerDestroy(&viewer);
143: }
144: #endif
145: MatDestroy(&GEd);
146: MatDestroy(&GEc);
147: }
149: return(0);
150: }
152: PetscErrorCode PCBDDCNedelecSupport(PC pc)
153: {
154: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
155: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
156: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
157: Vec tvec;
158: PetscSF sfv;
159: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
160: MPI_Comm comm;
161: IS lned,primals,allprimals,nedfieldlocal;
162: IS *eedges,*extrows,*extcols,*alleedges;
163: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
164: PetscScalar *vals,*work;
165: PetscReal *rwork;
166: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
167: PetscInt ne,nv,Lv,order,n,field;
168: PetscInt n_neigh,*neigh,*n_shared,**shared;
169: PetscInt i,j,extmem,cum,maxsize,nee;
170: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
171: PetscInt *sfvleaves,*sfvroots;
172: PetscInt *corners,*cedges;
173: PetscInt *ecount,**eneighs,*vcount,**vneighs;
174: #if defined(PETSC_USE_DEBUG)
175: PetscInt *emarks;
176: #endif
177: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
178: PetscErrorCode ierr;
181: /* If the discrete gradient is defined for a subset of dofs and global is true,
182: it assumes G is given in global ordering for all the dofs.
183: Otherwise, the ordering is global for the Nedelec field */
184: order = pcbddc->nedorder;
185: conforming = pcbddc->conforming;
186: field = pcbddc->nedfield;
187: global = pcbddc->nedglobal;
188: setprimal = PETSC_FALSE;
189: print = PETSC_FALSE;
190: singular = PETSC_FALSE;
192: /* Command line customization */
193: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
194: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
195: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
196: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
197: /* print debug info TODO: to be removed */
198: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
199: PetscOptionsEnd();
201: /* Return if there are no edges in the decomposition and the problem is not singular */
202: MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
203: ISLocalToGlobalMappingGetSize(al2g,&n);
204: PetscObjectGetComm((PetscObject)pc,&comm);
205: if (!singular) {
206: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
207: lrc[0] = PETSC_FALSE;
208: for (i=0;i<n;i++) {
209: if (PetscRealPart(vals[i]) > 2.) {
210: lrc[0] = PETSC_TRUE;
211: break;
212: }
213: }
214: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
215: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
216: if (!lrc[1]) return(0);
217: }
219: /* Get Nedelec field */
220: if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
221: if (pcbddc->n_ISForDofsLocal && field >= 0) {
222: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
223: nedfieldlocal = pcbddc->ISForDofsLocal[field];
224: ISGetLocalSize(nedfieldlocal,&ne);
225: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
226: ne = n;
227: nedfieldlocal = NULL;
228: global = PETSC_TRUE;
229: } else if (field == PETSC_DECIDE) {
230: PetscInt rst,ren,*idx;
232: PetscArrayzero(matis->sf_leafdata,n);
233: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
234: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
235: for (i=rst;i<ren;i++) {
236: PetscInt nc;
238: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
239: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
240: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
241: }
242: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
243: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
244: PetscMalloc1(n,&idx);
245: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
246: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
247: } else {
248: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
249: }
251: /* Sanity checks */
252: if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
253: if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
254: if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);
256: /* Just set primal dofs and return */
257: if (setprimal) {
258: IS enedfieldlocal;
259: PetscInt *eidxs;
261: PetscMalloc1(ne,&eidxs);
262: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
263: if (nedfieldlocal) {
264: ISGetIndices(nedfieldlocal,&idxs);
265: for (i=0,cum=0;i<ne;i++) {
266: if (PetscRealPart(vals[idxs[i]]) > 2.) {
267: eidxs[cum++] = idxs[i];
268: }
269: }
270: ISRestoreIndices(nedfieldlocal,&idxs);
271: } else {
272: for (i=0,cum=0;i<ne;i++) {
273: if (PetscRealPart(vals[i]) > 2.) {
274: eidxs[cum++] = i;
275: }
276: }
277: }
278: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
279: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
280: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
281: PetscFree(eidxs);
282: ISDestroy(&nedfieldlocal);
283: ISDestroy(&enedfieldlocal);
284: return(0);
285: }
287: /* Compute some l2g maps */
288: if (nedfieldlocal) {
289: IS is;
291: /* need to map from the local Nedelec field to local numbering */
292: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
293: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
294: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
295: ISLocalToGlobalMappingCreateIS(is,&al2g);
296: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
297: if (global) {
298: PetscObjectReference((PetscObject)al2g);
299: el2g = al2g;
300: } else {
301: IS gis;
303: ISRenumber(is,NULL,NULL,&gis);
304: ISLocalToGlobalMappingCreateIS(gis,&el2g);
305: ISDestroy(&gis);
306: }
307: ISDestroy(&is);
308: } else {
309: /* restore default */
310: pcbddc->nedfield = -1;
311: /* one ref for the destruction of al2g, one for el2g */
312: PetscObjectReference((PetscObject)al2g);
313: PetscObjectReference((PetscObject)al2g);
314: el2g = al2g;
315: fl2g = NULL;
316: }
318: /* Start communication to drop connections for interior edges (for cc analysis only) */
319: PetscArrayzero(matis->sf_leafdata,n);
320: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
321: if (nedfieldlocal) {
322: ISGetIndices(nedfieldlocal,&idxs);
323: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
324: ISRestoreIndices(nedfieldlocal,&idxs);
325: } else {
326: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
327: }
328: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
329: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
331: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
332: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
333: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
334: if (global) {
335: PetscInt rst;
337: MatGetOwnershipRange(G,&rst,NULL);
338: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
339: if (matis->sf_rootdata[i] < 2) {
340: matis->sf_rootdata[cum++] = i + rst;
341: }
342: }
343: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
344: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
345: } else {
346: PetscInt *tbz;
348: PetscMalloc1(ne,&tbz);
349: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
350: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
351: ISGetIndices(nedfieldlocal,&idxs);
352: for (i=0,cum=0;i<ne;i++)
353: if (matis->sf_leafdata[idxs[i]] == 1)
354: tbz[cum++] = i;
355: ISRestoreIndices(nedfieldlocal,&idxs);
356: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
357: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
358: PetscFree(tbz);
359: }
360: } else { /* we need the entire G to infer the nullspace */
361: PetscObjectReference((PetscObject)pcbddc->discretegradient);
362: G = pcbddc->discretegradient;
363: }
365: /* Extract subdomain relevant rows of G */
366: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
367: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
368: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
369: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
370: ISDestroy(&lned);
371: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
372: MatDestroy(&lGall);
373: MatISGetLocalMat(lGis,&lG);
375: /* SF for nodal dofs communications */
376: MatGetLocalSize(G,NULL,&Lv);
377: MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
378: PetscObjectReference((PetscObject)vl2g);
379: ISLocalToGlobalMappingGetSize(vl2g,&nv);
380: PetscSFCreate(comm,&sfv);
381: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
382: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
383: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
384: i = singular ? 2 : 1;
385: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
387: /* Destroy temporary G created in MATIS format and modified G */
388: PetscObjectReference((PetscObject)lG);
389: MatDestroy(&lGis);
390: MatDestroy(&G);
392: if (print) {
393: PetscObjectSetName((PetscObject)lG,"initial_lG");
394: MatView(lG,NULL);
395: }
397: /* Save lG for values insertion in change of basis */
398: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
400: /* Analyze the edge-nodes connections (duplicate lG) */
401: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
402: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
403: PetscBTCreate(nv,&btv);
404: PetscBTCreate(ne,&bte);
405: PetscBTCreate(ne,&btb);
406: PetscBTCreate(ne,&btbd);
407: PetscBTCreate(nv,&btvcand);
408: /* need to import the boundary specification to ensure the
409: proper detection of coarse edges' endpoints */
410: if (pcbddc->DirichletBoundariesLocal) {
411: IS is;
413: if (fl2g) {
414: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
415: } else {
416: is = pcbddc->DirichletBoundariesLocal;
417: }
418: ISGetLocalSize(is,&cum);
419: ISGetIndices(is,&idxs);
420: for (i=0;i<cum;i++) {
421: if (idxs[i] >= 0) {
422: PetscBTSet(btb,idxs[i]);
423: PetscBTSet(btbd,idxs[i]);
424: }
425: }
426: ISRestoreIndices(is,&idxs);
427: if (fl2g) {
428: ISDestroy(&is);
429: }
430: }
431: if (pcbddc->NeumannBoundariesLocal) {
432: IS is;
434: if (fl2g) {
435: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
436: } else {
437: is = pcbddc->NeumannBoundariesLocal;
438: }
439: ISGetLocalSize(is,&cum);
440: ISGetIndices(is,&idxs);
441: for (i=0;i<cum;i++) {
442: if (idxs[i] >= 0) {
443: PetscBTSet(btb,idxs[i]);
444: }
445: }
446: ISRestoreIndices(is,&idxs);
447: if (fl2g) {
448: ISDestroy(&is);
449: }
450: }
452: /* Count neighs per dof */
453: ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
454: ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);
456: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
457: for proper detection of coarse edges' endpoints */
458: PetscBTCreate(ne,&btee);
459: for (i=0;i<ne;i++) {
460: if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
461: PetscBTSet(btee,i);
462: }
463: }
464: PetscMalloc1(ne,&marks);
465: if (!conforming) {
466: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
467: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
468: }
469: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
470: MatSeqAIJGetArray(lGe,&vals);
471: cum = 0;
472: for (i=0;i<ne;i++) {
473: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
474: if (!PetscBTLookup(btee,i)) {
475: marks[cum++] = i;
476: continue;
477: }
478: /* set badly connected edge dofs as primal */
479: if (!conforming) {
480: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
481: marks[cum++] = i;
482: PetscBTSet(bte,i);
483: for (j=ii[i];j<ii[i+1];j++) {
484: PetscBTSet(btv,jj[j]);
485: }
486: } else {
487: /* every edge dofs should be connected trough a certain number of nodal dofs
488: to other edge dofs belonging to coarse edges
489: - at most 2 endpoints
490: - order-1 interior nodal dofs
491: - no undefined nodal dofs (nconn < order)
492: */
493: PetscInt ends = 0,ints = 0, undef = 0;
494: for (j=ii[i];j<ii[i+1];j++) {
495: PetscInt v = jj[j],k;
496: PetscInt nconn = iit[v+1]-iit[v];
497: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
498: if (nconn > order) ends++;
499: else if (nconn == order) ints++;
500: else undef++;
501: }
502: if (undef || ends > 2 || ints != order -1) {
503: marks[cum++] = i;
504: PetscBTSet(bte,i);
505: for (j=ii[i];j<ii[i+1];j++) {
506: PetscBTSet(btv,jj[j]);
507: }
508: }
509: }
510: }
511: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
512: if (!order && ii[i+1] != ii[i]) {
513: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
514: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
515: }
516: }
517: PetscBTDestroy(&btee);
518: MatSeqAIJRestoreArray(lGe,&vals);
519: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
520: if (!conforming) {
521: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
522: MatDestroy(&lGt);
523: }
524: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
526: /* identify splitpoints and corner candidates */
527: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
528: if (print) {
529: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
530: MatView(lGe,NULL);
531: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
532: MatView(lGt,NULL);
533: }
534: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
535: MatSeqAIJGetArray(lGt,&vals);
536: for (i=0;i<nv;i++) {
537: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
538: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
539: if (!order) { /* variable order */
540: PetscReal vorder = 0.;
542: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
543: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
544: if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
545: ord = 1;
546: }
547: #if defined(PETSC_USE_DEBUG)
548: if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
549: #endif
550: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
551: if (PetscBTLookup(btbd,jj[j])) {
552: bdir = PETSC_TRUE;
553: break;
554: }
555: if (vc != ecount[jj[j]]) {
556: sneighs = PETSC_FALSE;
557: } else {
558: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
559: for (k=0;k<vc;k++) {
560: if (vn[k] != en[k]) {
561: sneighs = PETSC_FALSE;
562: break;
563: }
564: }
565: }
566: }
567: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
568: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
569: PetscBTSet(btv,i);
570: } else if (test == ord) {
571: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
572: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
573: PetscBTSet(btv,i);
574: } else {
575: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
576: PetscBTSet(btvcand,i);
577: }
578: }
579: }
580: ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
581: ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
582: PetscBTDestroy(&btbd);
584: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
585: if (order != 1) {
586: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
587: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
588: for (i=0;i<nv;i++) {
589: if (PetscBTLookup(btvcand,i)) {
590: PetscBool found = PETSC_FALSE;
591: for (j=ii[i];j<ii[i+1] && !found;j++) {
592: PetscInt k,e = jj[j];
593: if (PetscBTLookup(bte,e)) continue;
594: for (k=iit[e];k<iit[e+1];k++) {
595: PetscInt v = jjt[k];
596: if (v != i && PetscBTLookup(btvcand,v)) {
597: found = PETSC_TRUE;
598: break;
599: }
600: }
601: }
602: if (!found) {
603: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i);
604: PetscBTClear(btvcand,i);
605: } else {
606: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i);
607: }
608: }
609: }
610: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
611: }
612: MatSeqAIJRestoreArray(lGt,&vals);
613: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
614: MatDestroy(&lGe);
616: /* Get the local G^T explicitly */
617: MatDestroy(&lGt);
618: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
619: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
621: /* Mark interior nodal dofs */
622: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
623: PetscBTCreate(nv,&btvi);
624: for (i=1;i<n_neigh;i++) {
625: for (j=0;j<n_shared[i];j++) {
626: PetscBTSet(btvi,shared[i][j]);
627: }
628: }
629: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
631: /* communicate corners and splitpoints */
632: PetscMalloc1(nv,&vmarks);
633: PetscArrayzero(sfvleaves,nv);
634: PetscArrayzero(sfvroots,Lv);
635: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
637: if (print) {
638: IS tbz;
640: cum = 0;
641: for (i=0;i<nv;i++)
642: if (sfvleaves[i])
643: vmarks[cum++] = i;
645: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
646: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
647: ISView(tbz,NULL);
648: ISDestroy(&tbz);
649: }
651: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
652: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
653: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
654: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);
656: /* Zero rows of lGt corresponding to identified corners
657: and interior nodal dofs */
658: cum = 0;
659: for (i=0;i<nv;i++) {
660: if (sfvleaves[i]) {
661: vmarks[cum++] = i;
662: PetscBTSet(btv,i);
663: }
664: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
665: }
666: PetscBTDestroy(&btvi);
667: if (print) {
668: IS tbz;
670: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
671: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
672: ISView(tbz,NULL);
673: ISDestroy(&tbz);
674: }
675: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
676: PetscFree(vmarks);
677: PetscSFDestroy(&sfv);
678: PetscFree2(sfvleaves,sfvroots);
680: /* Recompute G */
681: MatDestroy(&lG);
682: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
683: if (print) {
684: PetscObjectSetName((PetscObject)lG,"used_lG");
685: MatView(lG,NULL);
686: PetscObjectSetName((PetscObject)lGt,"used_lGt");
687: MatView(lGt,NULL);
688: }
690: /* Get primal dofs (if any) */
691: cum = 0;
692: for (i=0;i<ne;i++) {
693: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
694: }
695: if (fl2g) {
696: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
697: }
698: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
699: if (print) {
700: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
701: ISView(primals,NULL);
702: }
703: PetscBTDestroy(&bte);
704: /* TODO: what if the user passed in some of them ? */
705: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
706: ISDestroy(&primals);
708: /* Compute edge connectivity */
709: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
711: /* Symbolic conn = lG*lGt */
712: MatProductCreate(lG,lGt,NULL,&conn);
713: MatProductSetType(conn,MATPRODUCT_AB);
714: MatProductSetAlgorithm(conn,"default");
715: MatProductSetFill(conn,PETSC_DEFAULT);
716: PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");
717: MatProductSetFromOptions(conn);
718: MatProductSymbolic(conn);
720: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
721: if (fl2g) {
722: PetscBT btf;
723: PetscInt *iia,*jja,*iiu,*jju;
724: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
726: /* create CSR for all local dofs */
727: PetscMalloc1(n+1,&iia);
728: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
729: if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
730: iiu = pcbddc->mat_graph->xadj;
731: jju = pcbddc->mat_graph->adjncy;
732: } else if (pcbddc->use_local_adj) {
733: rest = PETSC_TRUE;
734: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
735: } else {
736: free = PETSC_TRUE;
737: PetscMalloc2(n+1,&iiu,n,&jju);
738: iiu[0] = 0;
739: for (i=0;i<n;i++) {
740: iiu[i+1] = i+1;
741: jju[i] = -1;
742: }
743: }
745: /* import sizes of CSR */
746: iia[0] = 0;
747: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
749: /* overwrite entries corresponding to the Nedelec field */
750: PetscBTCreate(n,&btf);
751: ISGetIndices(nedfieldlocal,&idxs);
752: for (i=0;i<ne;i++) {
753: PetscBTSet(btf,idxs[i]);
754: iia[idxs[i]+1] = ii[i+1]-ii[i];
755: }
757: /* iia in CSR */
758: for (i=0;i<n;i++) iia[i+1] += iia[i];
760: /* jja in CSR */
761: PetscMalloc1(iia[n],&jja);
762: for (i=0;i<n;i++)
763: if (!PetscBTLookup(btf,i))
764: for (j=0;j<iiu[i+1]-iiu[i];j++)
765: jja[iia[i]+j] = jju[iiu[i]+j];
767: /* map edge dofs connectivity */
768: if (jj) {
769: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
770: for (i=0;i<ne;i++) {
771: PetscInt e = idxs[i];
772: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
773: }
774: }
775: ISRestoreIndices(nedfieldlocal,&idxs);
776: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
777: if (rest) {
778: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
779: }
780: if (free) {
781: PetscFree2(iiu,jju);
782: }
783: PetscBTDestroy(&btf);
784: } else {
785: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
786: }
788: /* Analyze interface for edge dofs */
789: PCBDDCAnalyzeInterface(pc);
790: pcbddc->mat_graph->twodim = PETSC_FALSE;
792: /* Get coarse edges in the edge space */
793: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
794: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
796: if (fl2g) {
797: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
798: PetscMalloc1(nee,&eedges);
799: for (i=0;i<nee;i++) {
800: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
801: }
802: } else {
803: eedges = alleedges;
804: primals = allprimals;
805: }
807: /* Mark fine edge dofs with their coarse edge id */
808: PetscArrayzero(marks,ne);
809: ISGetLocalSize(primals,&cum);
810: ISGetIndices(primals,&idxs);
811: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
812: ISRestoreIndices(primals,&idxs);
813: if (print) {
814: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
815: ISView(primals,NULL);
816: }
818: maxsize = 0;
819: for (i=0;i<nee;i++) {
820: PetscInt size,mark = i+1;
822: ISGetLocalSize(eedges[i],&size);
823: ISGetIndices(eedges[i],&idxs);
824: for (j=0;j<size;j++) marks[idxs[j]] = mark;
825: ISRestoreIndices(eedges[i],&idxs);
826: maxsize = PetscMax(maxsize,size);
827: }
829: /* Find coarse edge endpoints */
830: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
831: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
832: for (i=0;i<nee;i++) {
833: PetscInt mark = i+1,size;
835: ISGetLocalSize(eedges[i],&size);
836: if (!size && nedfieldlocal) continue;
837: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
838: ISGetIndices(eedges[i],&idxs);
839: if (print) {
840: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
841: ISView(eedges[i],NULL);
842: }
843: for (j=0;j<size;j++) {
844: PetscInt k, ee = idxs[j];
845: if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee);
846: for (k=ii[ee];k<ii[ee+1];k++) {
847: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]);
848: if (PetscBTLookup(btv,jj[k])) {
849: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]);
850: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
851: PetscInt k2;
852: PetscBool corner = PETSC_FALSE;
853: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
854: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
855: /* it's a corner if either is connected with an edge dof belonging to a different cc or
856: if the edge dof lie on the natural part of the boundary */
857: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
858: corner = PETSC_TRUE;
859: break;
860: }
861: }
862: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
863: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]);
864: PetscBTSet(btv,jj[k]);
865: } else {
866: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
867: }
868: }
869: }
870: }
871: ISRestoreIndices(eedges[i],&idxs);
872: }
873: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
874: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
875: PetscBTDestroy(&btb);
877: /* Reset marked primal dofs */
878: ISGetLocalSize(primals,&cum);
879: ISGetIndices(primals,&idxs);
880: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
881: ISRestoreIndices(primals,&idxs);
883: /* Now use the initial lG */
884: MatDestroy(&lG);
885: MatDestroy(&lGt);
886: lG = lGinit;
887: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
889: /* Compute extended cols indices */
890: PetscBTCreate(nv,&btvc);
891: PetscBTCreate(nee,&bter);
892: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
893: MatSeqAIJGetMaxRowNonzeros(lG,&i);
894: i *= maxsize;
895: PetscCalloc1(nee,&extcols);
896: PetscMalloc2(i,&extrow,i,&gidxs);
897: eerr = PETSC_FALSE;
898: for (i=0;i<nee;i++) {
899: PetscInt size,found = 0;
901: cum = 0;
902: ISGetLocalSize(eedges[i],&size);
903: if (!size && nedfieldlocal) continue;
904: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
905: ISGetIndices(eedges[i],&idxs);
906: PetscBTMemzero(nv,btvc);
907: for (j=0;j<size;j++) {
908: PetscInt k,ee = idxs[j];
909: for (k=ii[ee];k<ii[ee+1];k++) {
910: PetscInt vv = jj[k];
911: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
912: else if (!PetscBTLookupSet(btvc,vv)) found++;
913: }
914: }
915: ISRestoreIndices(eedges[i],&idxs);
916: PetscSortRemoveDupsInt(&cum,extrow);
917: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
918: PetscSortIntWithArray(cum,gidxs,extrow);
919: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
920: /* it may happen that endpoints are not defined at this point
921: if it is the case, mark this edge for a second pass */
922: if (cum != size -1 || found != 2) {
923: PetscBTSet(bter,i);
924: if (print) {
925: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
926: ISView(eedges[i],NULL);
927: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
928: ISView(extcols[i],NULL);
929: }
930: eerr = PETSC_TRUE;
931: }
932: }
933: /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
934: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
935: if (done) {
936: PetscInt *newprimals;
938: PetscMalloc1(ne,&newprimals);
939: ISGetLocalSize(primals,&cum);
940: ISGetIndices(primals,&idxs);
941: PetscArraycpy(newprimals,idxs,cum);
942: ISRestoreIndices(primals,&idxs);
943: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
944: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
945: for (i=0;i<nee;i++) {
946: PetscBool has_candidates = PETSC_FALSE;
947: if (PetscBTLookup(bter,i)) {
948: PetscInt size,mark = i+1;
950: ISGetLocalSize(eedges[i],&size);
951: ISGetIndices(eedges[i],&idxs);
952: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
953: for (j=0;j<size;j++) {
954: PetscInt k,ee = idxs[j];
955: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
956: for (k=ii[ee];k<ii[ee+1];k++) {
957: /* set all candidates located on the edge as corners */
958: if (PetscBTLookup(btvcand,jj[k])) {
959: PetscInt k2,vv = jj[k];
960: has_candidates = PETSC_TRUE;
961: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv);
962: PetscBTSet(btv,vv);
963: /* set all edge dofs connected to candidate as primals */
964: for (k2=iit[vv];k2<iit[vv+1];k2++) {
965: if (marks[jjt[k2]] == mark) {
966: PetscInt k3,ee2 = jjt[k2];
967: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2);
968: newprimals[cum++] = ee2;
969: /* finally set the new corners */
970: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
971: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]);
972: PetscBTSet(btv,jj[k3]);
973: }
974: }
975: }
976: } else {
977: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]);
978: }
979: }
980: }
981: if (!has_candidates) { /* circular edge */
982: PetscInt k, ee = idxs[0],*tmarks;
984: PetscCalloc1(ne,&tmarks);
985: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i);
986: for (k=ii[ee];k<ii[ee+1];k++) {
987: PetscInt k2;
988: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]);
989: PetscBTSet(btv,jj[k]);
990: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
991: }
992: for (j=0;j<size;j++) {
993: if (tmarks[idxs[j]] > 1) {
994: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]);
995: newprimals[cum++] = idxs[j];
996: }
997: }
998: PetscFree(tmarks);
999: }
1000: ISRestoreIndices(eedges[i],&idxs);
1001: }
1002: ISDestroy(&extcols[i]);
1003: }
1004: PetscFree(extcols);
1005: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1006: PetscSortRemoveDupsInt(&cum,newprimals);
1007: if (fl2g) {
1008: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1009: ISDestroy(&primals);
1010: for (i=0;i<nee;i++) {
1011: ISDestroy(&eedges[i]);
1012: }
1013: PetscFree(eedges);
1014: }
1015: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1016: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1017: PetscFree(newprimals);
1018: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1019: ISDestroy(&primals);
1020: PCBDDCAnalyzeInterface(pc);
1021: pcbddc->mat_graph->twodim = PETSC_FALSE;
1022: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1023: if (fl2g) {
1024: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1025: PetscMalloc1(nee,&eedges);
1026: for (i=0;i<nee;i++) {
1027: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1028: }
1029: } else {
1030: eedges = alleedges;
1031: primals = allprimals;
1032: }
1033: PetscCalloc1(nee,&extcols);
1035: /* Mark again */
1036: PetscArrayzero(marks,ne);
1037: for (i=0;i<nee;i++) {
1038: PetscInt size,mark = i+1;
1040: ISGetLocalSize(eedges[i],&size);
1041: ISGetIndices(eedges[i],&idxs);
1042: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1043: ISRestoreIndices(eedges[i],&idxs);
1044: }
1045: if (print) {
1046: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1047: ISView(primals,NULL);
1048: }
1050: /* Recompute extended cols */
1051: eerr = PETSC_FALSE;
1052: for (i=0;i<nee;i++) {
1053: PetscInt size;
1055: cum = 0;
1056: ISGetLocalSize(eedges[i],&size);
1057: if (!size && nedfieldlocal) continue;
1058: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1059: ISGetIndices(eedges[i],&idxs);
1060: for (j=0;j<size;j++) {
1061: PetscInt k,ee = idxs[j];
1062: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1063: }
1064: ISRestoreIndices(eedges[i],&idxs);
1065: PetscSortRemoveDupsInt(&cum,extrow);
1066: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1067: PetscSortIntWithArray(cum,gidxs,extrow);
1068: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1069: if (cum != size -1) {
1070: if (print) {
1071: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1072: ISView(eedges[i],NULL);
1073: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1074: ISView(extcols[i],NULL);
1075: }
1076: eerr = PETSC_TRUE;
1077: }
1078: }
1079: }
1080: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1081: PetscFree2(extrow,gidxs);
1082: PetscBTDestroy(&bter);
1083: if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1084: /* an error should not occur at this point */
1085: if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1087: /* Check the number of endpoints */
1088: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1089: PetscMalloc1(2*nee,&corners);
1090: PetscMalloc1(nee,&cedges);
1091: for (i=0;i<nee;i++) {
1092: PetscInt size, found = 0, gc[2];
1094: /* init with defaults */
1095: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1096: ISGetLocalSize(eedges[i],&size);
1097: if (!size && nedfieldlocal) continue;
1098: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1099: ISGetIndices(eedges[i],&idxs);
1100: PetscBTMemzero(nv,btvc);
1101: for (j=0;j<size;j++) {
1102: PetscInt k,ee = idxs[j];
1103: for (k=ii[ee];k<ii[ee+1];k++) {
1104: PetscInt vv = jj[k];
1105: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1106: if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1107: corners[i*2+found++] = vv;
1108: }
1109: }
1110: }
1111: if (found != 2) {
1112: PetscInt e;
1113: if (fl2g) {
1114: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1115: } else {
1116: e = idxs[0];
1117: }
1118: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1119: }
1121: /* get primal dof index on this coarse edge */
1122: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1123: if (gc[0] > gc[1]) {
1124: PetscInt swap = corners[2*i];
1125: corners[2*i] = corners[2*i+1];
1126: corners[2*i+1] = swap;
1127: }
1128: cedges[i] = idxs[size-1];
1129: ISRestoreIndices(eedges[i],&idxs);
1130: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1131: }
1132: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1133: PetscBTDestroy(&btvc);
1135: #if defined(PETSC_USE_DEBUG)
1136: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1137: not interfere with neighbouring coarse edges */
1138: PetscMalloc1(nee+1,&emarks);
1139: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1140: for (i=0;i<nv;i++) {
1141: PetscInt emax = 0,eemax = 0;
1143: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1144: PetscArrayzero(emarks,nee+1);
1145: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1146: for (j=1;j<nee+1;j++) {
1147: if (emax < emarks[j]) {
1148: emax = emarks[j];
1149: eemax = j;
1150: }
1151: }
1152: /* not relevant for edges */
1153: if (!eemax) continue;
1155: for (j=ii[i];j<ii[i+1];j++) {
1156: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1157: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1158: }
1159: }
1160: }
1161: PetscFree(emarks);
1162: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1163: #endif
1165: /* Compute extended rows indices for edge blocks of the change of basis */
1166: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1167: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1168: extmem *= maxsize;
1169: PetscMalloc1(extmem*nee,&extrow);
1170: PetscMalloc1(nee,&extrows);
1171: PetscCalloc1(nee,&extrowcum);
1172: for (i=0;i<nv;i++) {
1173: PetscInt mark = 0,size,start;
1175: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1176: for (j=ii[i];j<ii[i+1];j++)
1177: if (marks[jj[j]] && !mark)
1178: mark = marks[jj[j]];
1180: /* not relevant */
1181: if (!mark) continue;
1183: /* import extended row */
1184: mark--;
1185: start = mark*extmem+extrowcum[mark];
1186: size = ii[i+1]-ii[i];
1187: if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1188: PetscArraycpy(extrow+start,jj+ii[i],size);
1189: extrowcum[mark] += size;
1190: }
1191: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1192: MatDestroy(&lGt);
1193: PetscFree(marks);
1195: /* Compress extrows */
1196: cum = 0;
1197: for (i=0;i<nee;i++) {
1198: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1199: PetscSortRemoveDupsInt(&size,start);
1200: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1201: cum = PetscMax(cum,size);
1202: }
1203: PetscFree(extrowcum);
1204: PetscBTDestroy(&btv);
1205: PetscBTDestroy(&btvcand);
1207: /* Workspace for lapack inner calls and VecSetValues */
1208: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1210: /* Create change of basis matrix (preallocation can be improved) */
1211: MatCreate(comm,&T);
1212: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1213: pc->pmat->rmap->N,pc->pmat->rmap->N);
1214: MatSetType(T,MATAIJ);
1215: MatSeqAIJSetPreallocation(T,10,NULL);
1216: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1217: MatSetLocalToGlobalMapping(T,al2g,al2g);
1218: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1219: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1220: ISLocalToGlobalMappingDestroy(&al2g);
1222: /* Defaults to identity */
1223: MatCreateVecs(pc->pmat,&tvec,NULL);
1224: VecSet(tvec,1.0);
1225: MatDiagonalSet(T,tvec,INSERT_VALUES);
1226: VecDestroy(&tvec);
1228: /* Create discrete gradient for the coarser level if needed */
1229: MatDestroy(&pcbddc->nedcG);
1230: ISDestroy(&pcbddc->nedclocal);
1231: if (pcbddc->current_level < pcbddc->max_levels) {
1232: ISLocalToGlobalMapping cel2g,cvl2g;
1233: IS wis,gwis;
1234: PetscInt cnv,cne;
1236: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1237: if (fl2g) {
1238: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1239: } else {
1240: PetscObjectReference((PetscObject)wis);
1241: pcbddc->nedclocal = wis;
1242: }
1243: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1244: ISDestroy(&wis);
1245: ISRenumber(gwis,NULL,&cne,&wis);
1246: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1247: ISDestroy(&wis);
1248: ISDestroy(&gwis);
1250: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1251: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1252: ISDestroy(&wis);
1253: ISRenumber(gwis,NULL,&cnv,&wis);
1254: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1255: ISDestroy(&wis);
1256: ISDestroy(&gwis);
1258: MatCreate(comm,&pcbddc->nedcG);
1259: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1260: MatSetType(pcbddc->nedcG,MATAIJ);
1261: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1262: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1263: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1264: ISLocalToGlobalMappingDestroy(&cel2g);
1265: ISLocalToGlobalMappingDestroy(&cvl2g);
1266: }
1267: ISLocalToGlobalMappingDestroy(&vl2g);
1269: #if defined(PRINT_GDET)
1270: inc = 0;
1271: lev = pcbddc->current_level;
1272: #endif
1274: /* Insert values in the change of basis matrix */
1275: for (i=0;i<nee;i++) {
1276: Mat Gins = NULL, GKins = NULL;
1277: IS cornersis = NULL;
1278: PetscScalar cvals[2];
1280: if (pcbddc->nedcG) {
1281: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1282: }
1283: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1284: if (Gins && GKins) {
1285: const PetscScalar *data;
1286: const PetscInt *rows,*cols;
1287: PetscInt nrh,nch,nrc,ncc;
1289: ISGetIndices(eedges[i],&cols);
1290: /* H1 */
1291: ISGetIndices(extrows[i],&rows);
1292: MatGetSize(Gins,&nrh,&nch);
1293: MatDenseGetArrayRead(Gins,&data);
1294: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1295: MatDenseRestoreArrayRead(Gins,&data);
1296: ISRestoreIndices(extrows[i],&rows);
1297: /* complement */
1298: MatGetSize(GKins,&nrc,&ncc);
1299: if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1300: if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1301: if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1302: MatDenseGetArrayRead(GKins,&data);
1303: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1304: MatDenseRestoreArrayRead(GKins,&data);
1306: /* coarse discrete gradient */
1307: if (pcbddc->nedcG) {
1308: PetscInt cols[2];
1310: cols[0] = 2*i;
1311: cols[1] = 2*i+1;
1312: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1313: }
1314: ISRestoreIndices(eedges[i],&cols);
1315: }
1316: ISDestroy(&extrows[i]);
1317: ISDestroy(&extcols[i]);
1318: ISDestroy(&cornersis);
1319: MatDestroy(&Gins);
1320: MatDestroy(&GKins);
1321: }
1322: ISLocalToGlobalMappingDestroy(&el2g);
1324: /* Start assembling */
1325: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1326: if (pcbddc->nedcG) {
1327: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1328: }
1330: /* Free */
1331: if (fl2g) {
1332: ISDestroy(&primals);
1333: for (i=0;i<nee;i++) {
1334: ISDestroy(&eedges[i]);
1335: }
1336: PetscFree(eedges);
1337: }
1339: /* hack mat_graph with primal dofs on the coarse edges */
1340: {
1341: PCBDDCGraph graph = pcbddc->mat_graph;
1342: PetscInt *oqueue = graph->queue;
1343: PetscInt *ocptr = graph->cptr;
1344: PetscInt ncc,*idxs;
1346: /* find first primal edge */
1347: if (pcbddc->nedclocal) {
1348: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1349: } else {
1350: if (fl2g) {
1351: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1352: }
1353: idxs = cedges;
1354: }
1355: cum = 0;
1356: while (cum < nee && cedges[cum] < 0) cum++;
1358: /* adapt connected components */
1359: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1360: graph->cptr[0] = 0;
1361: for (i=0,ncc=0;i<graph->ncc;i++) {
1362: PetscInt lc = ocptr[i+1]-ocptr[i];
1363: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1364: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1365: graph->queue[graph->cptr[ncc]] = cedges[cum];
1366: ncc++;
1367: lc--;
1368: cum++;
1369: while (cum < nee && cedges[cum] < 0) cum++;
1370: }
1371: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1372: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1373: ncc++;
1374: }
1375: graph->ncc = ncc;
1376: if (pcbddc->nedclocal) {
1377: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1378: }
1379: PetscFree2(ocptr,oqueue);
1380: }
1381: ISLocalToGlobalMappingDestroy(&fl2g);
1382: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1383: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1384: MatDestroy(&conn);
1386: ISDestroy(&nedfieldlocal);
1387: PetscFree(extrow);
1388: PetscFree2(work,rwork);
1389: PetscFree(corners);
1390: PetscFree(cedges);
1391: PetscFree(extrows);
1392: PetscFree(extcols);
1393: MatDestroy(&lG);
1395: /* Complete assembling */
1396: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1397: if (pcbddc->nedcG) {
1398: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1399: #if 0
1400: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1401: MatView(pcbddc->nedcG,NULL);
1402: #endif
1403: }
1405: /* set change of basis */
1406: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1407: MatDestroy(&T);
1409: return(0);
1410: }
1412: /* the near-null space of BDDC carries information on quadrature weights,
1413: and these can be collinear -> so cheat with MatNullSpaceCreate
1414: and create a suitable set of basis vectors first */
1415: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1416: {
1418: PetscInt i;
1421: for (i=0;i<nvecs;i++) {
1422: PetscInt first,last;
1424: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1425: if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1426: if (i>=first && i < last) {
1427: PetscScalar *data;
1428: VecGetArray(quad_vecs[i],&data);
1429: if (!has_const) {
1430: data[i-first] = 1.;
1431: } else {
1432: data[2*i-first] = 1./PetscSqrtReal(2.);
1433: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1434: }
1435: VecRestoreArray(quad_vecs[i],&data);
1436: }
1437: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1438: }
1439: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1440: for (i=0;i<nvecs;i++) { /* reset vectors */
1441: PetscInt first,last;
1442: VecLockReadPop(quad_vecs[i]);
1443: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1444: if (i>=first && i < last) {
1445: PetscScalar *data;
1446: VecGetArray(quad_vecs[i],&data);
1447: if (!has_const) {
1448: data[i-first] = 0.;
1449: } else {
1450: data[2*i-first] = 0.;
1451: data[2*i-first+1] = 0.;
1452: }
1453: VecRestoreArray(quad_vecs[i],&data);
1454: }
1455: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1456: VecLockReadPush(quad_vecs[i]);
1457: }
1458: return(0);
1459: }
1461: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1462: {
1463: Mat loc_divudotp;
1464: Vec p,v,vins,quad_vec,*quad_vecs;
1465: ISLocalToGlobalMapping map;
1466: PetscScalar *vals;
1467: const PetscScalar *array;
1468: PetscInt i,maxneighs,maxsize,*gidxs;
1469: PetscInt n_neigh,*neigh,*n_shared,**shared;
1470: PetscMPIInt rank;
1471: PetscErrorCode ierr;
1474: ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1475: MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1476: if (!maxneighs) {
1477: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1478: *nnsp = NULL;
1479: return(0);
1480: }
1481: maxsize = 0;
1482: for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1483: PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1484: /* create vectors to hold quadrature weights */
1485: MatCreateVecs(A,&quad_vec,NULL);
1486: if (!transpose) {
1487: MatGetLocalToGlobalMapping(A,&map,NULL);
1488: } else {
1489: MatGetLocalToGlobalMapping(A,NULL,&map);
1490: }
1491: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1492: VecDestroy(&quad_vec);
1493: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1494: for (i=0;i<maxneighs;i++) {
1495: VecLockReadPop(quad_vecs[i]);
1496: }
1498: /* compute local quad vec */
1499: MatISGetLocalMat(divudotp,&loc_divudotp);
1500: if (!transpose) {
1501: MatCreateVecs(loc_divudotp,&v,&p);
1502: } else {
1503: MatCreateVecs(loc_divudotp,&p,&v);
1504: }
1505: VecSet(p,1.);
1506: if (!transpose) {
1507: MatMultTranspose(loc_divudotp,p,v);
1508: } else {
1509: MatMult(loc_divudotp,p,v);
1510: }
1511: if (vl2l) {
1512: Mat lA;
1513: VecScatter sc;
1515: MatISGetLocalMat(A,&lA);
1516: MatCreateVecs(lA,&vins,NULL);
1517: VecScatterCreate(v,NULL,vins,vl2l,&sc);
1518: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1519: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1520: VecScatterDestroy(&sc);
1521: } else {
1522: vins = v;
1523: }
1524: VecGetArrayRead(vins,&array);
1525: VecDestroy(&p);
1527: /* insert in global quadrature vecs */
1528: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1529: for (i=0;i<n_neigh;i++) {
1530: const PetscInt *idxs;
1531: PetscInt idx,nn,j;
1533: idxs = shared[i];
1534: nn = n_shared[i];
1535: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1536: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1537: idx = -(idx+1);
1538: ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1539: VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1540: }
1541: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1542: VecRestoreArrayRead(vins,&array);
1543: if (vl2l) {
1544: VecDestroy(&vins);
1545: }
1546: VecDestroy(&v);
1547: PetscFree2(gidxs,vals);
1549: /* assemble near null space */
1550: for (i=0;i<maxneighs;i++) {
1551: VecAssemblyBegin(quad_vecs[i]);
1552: }
1553: for (i=0;i<maxneighs;i++) {
1554: VecAssemblyEnd(quad_vecs[i]);
1555: VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1556: VecLockReadPush(quad_vecs[i]);
1557: }
1558: VecDestroyVecs(maxneighs,&quad_vecs);
1559: return(0);
1560: }
1562: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1563: {
1564: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1568: if (primalv) {
1569: if (pcbddc->user_primal_vertices_local) {
1570: IS list[2], newp;
1572: list[0] = primalv;
1573: list[1] = pcbddc->user_primal_vertices_local;
1574: ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1575: ISSortRemoveDups(newp);
1576: ISDestroy(&list[1]);
1577: pcbddc->user_primal_vertices_local = newp;
1578: } else {
1579: PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1580: }
1581: }
1582: return(0);
1583: }
1585: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1586: {
1587: PetscInt f, *comp = (PetscInt *)ctx;
1590: for (f=0;f<Nf;f++) out[f] = X[*comp];
1591: return(0);
1592: }
1594: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1595: {
1597: Vec local,global;
1598: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1599: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1600: PetscBool monolithic = PETSC_FALSE;
1603: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1604: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1605: PetscOptionsEnd();
1606: /* need to convert from global to local topology information and remove references to information in global ordering */
1607: MatCreateVecs(pc->pmat,&global,NULL);
1608: MatCreateVecs(matis->A,&local,NULL);
1609: VecBindToCPU(global,PETSC_TRUE);
1610: VecBindToCPU(local,PETSC_TRUE);
1611: if (monolithic) { /* just get block size to properly compute vertices */
1612: if (pcbddc->vertex_size == 1) {
1613: MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1614: }
1615: goto boundary;
1616: }
1618: if (pcbddc->user_provided_isfordofs) {
1619: if (pcbddc->n_ISForDofs) {
1620: PetscInt i;
1622: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1623: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1624: PetscInt bs;
1626: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1627: ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1628: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1629: ISDestroy(&pcbddc->ISForDofs[i]);
1630: }
1631: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1632: pcbddc->n_ISForDofs = 0;
1633: PetscFree(pcbddc->ISForDofs);
1634: }
1635: } else {
1636: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1637: DM dm;
1639: MatGetDM(pc->pmat, &dm);
1640: if (!dm) {
1641: PCGetDM(pc, &dm);
1642: }
1643: if (dm) {
1644: IS *fields;
1645: PetscInt nf,i;
1647: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1648: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1649: for (i=0;i<nf;i++) {
1650: PetscInt bs;
1652: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1653: ISGetBlockSize(fields[i],&bs);
1654: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1655: ISDestroy(&fields[i]);
1656: }
1657: PetscFree(fields);
1658: pcbddc->n_ISForDofsLocal = nf;
1659: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1660: PetscContainer c;
1662: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1663: if (c) {
1664: MatISLocalFields lf;
1665: PetscContainerGetPointer(c,(void**)&lf);
1666: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1667: } else { /* fallback, create the default fields if bs > 1 */
1668: PetscInt i, n = matis->A->rmap->n;
1669: MatGetBlockSize(pc->pmat,&i);
1670: if (i > 1) {
1671: pcbddc->n_ISForDofsLocal = i;
1672: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1673: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1674: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1675: }
1676: }
1677: }
1678: }
1679: } else {
1680: PetscInt i;
1681: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1682: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1683: }
1684: }
1685: }
1687: boundary:
1688: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1689: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1690: } else if (pcbddc->DirichletBoundariesLocal) {
1691: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1692: }
1693: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1694: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1695: } else if (pcbddc->NeumannBoundariesLocal) {
1696: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1697: }
1698: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1699: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1700: }
1701: VecDestroy(&global);
1702: VecDestroy(&local);
1703: /* detect local disconnected subdomains if requested (use matis->A) */
1704: if (pcbddc->detect_disconnected) {
1705: IS primalv = NULL;
1706: PetscInt i;
1707: PetscBool filter = pcbddc->detect_disconnected_filter;
1709: for (i=0;i<pcbddc->n_local_subs;i++) {
1710: ISDestroy(&pcbddc->local_subs[i]);
1711: }
1712: PetscFree(pcbddc->local_subs);
1713: PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1714: PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1715: ISDestroy(&primalv);
1716: }
1717: /* early stage corner detection */
1718: {
1719: DM dm;
1721: MatGetDM(pc->pmat,&dm);
1722: if (!dm) {
1723: PCGetDM(pc,&dm);
1724: }
1725: if (dm) {
1726: PetscBool isda;
1728: PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1729: if (isda) {
1730: ISLocalToGlobalMapping l2l;
1731: IS corners;
1732: Mat lA;
1733: PetscBool gl,lo;
1735: {
1736: Vec cvec;
1737: const PetscScalar *coords;
1738: PetscInt dof,n,cdim;
1739: PetscBool memc = PETSC_TRUE;
1741: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1742: DMGetCoordinates(dm,&cvec);
1743: VecGetLocalSize(cvec,&n);
1744: VecGetBlockSize(cvec,&cdim);
1745: n /= cdim;
1746: PetscFree(pcbddc->mat_graph->coords);
1747: PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1748: VecGetArrayRead(cvec,&coords);
1749: #if defined(PETSC_USE_COMPLEX)
1750: memc = PETSC_FALSE;
1751: #endif
1752: if (dof != 1) memc = PETSC_FALSE;
1753: if (memc) {
1754: PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);
1755: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1756: PetscReal *bcoords = pcbddc->mat_graph->coords;
1757: PetscInt i, b, d;
1759: for (i=0;i<n;i++) {
1760: for (b=0;b<dof;b++) {
1761: for (d=0;d<cdim;d++) {
1762: bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1763: }
1764: }
1765: }
1766: }
1767: VecRestoreArrayRead(cvec,&coords);
1768: pcbddc->mat_graph->cdim = cdim;
1769: pcbddc->mat_graph->cnloc = dof*n;
1770: pcbddc->mat_graph->cloc = PETSC_FALSE;
1771: }
1772: DMDAGetSubdomainCornersIS(dm,&corners);
1773: MatISGetLocalMat(pc->pmat,&lA);
1774: MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1775: MatISRestoreLocalMat(pc->pmat,&lA);
1776: lo = (PetscBool)(l2l && corners);
1777: MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1778: if (gl) { /* From PETSc's DMDA */
1779: const PetscInt *idx;
1780: PetscInt dof,bs,*idxout,n;
1782: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1783: ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1784: ISGetLocalSize(corners,&n);
1785: ISGetIndices(corners,&idx);
1786: if (bs == dof) {
1787: PetscMalloc1(n,&idxout);
1788: ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1789: } else { /* the original DMDA local-to-local map have been modified */
1790: PetscInt i,d;
1792: PetscMalloc1(dof*n,&idxout);
1793: for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1794: ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);
1796: bs = 1;
1797: n *= dof;
1798: }
1799: ISRestoreIndices(corners,&idx);
1800: DMDARestoreSubdomainCornersIS(dm,&corners);
1801: ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1802: PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1803: ISDestroy(&corners);
1804: pcbddc->corner_selected = PETSC_TRUE;
1805: pcbddc->corner_selection = PETSC_TRUE;
1806: }
1807: if (corners) {
1808: DMDARestoreSubdomainCornersIS(dm,&corners);
1809: }
1810: }
1811: }
1812: }
1813: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1814: DM dm;
1816: MatGetDM(pc->pmat,&dm);
1817: if (!dm) {
1818: PCGetDM(pc,&dm);
1819: }
1820: if (dm) { /* this can get very expensive, I need to find a faster alternative */
1821: Vec vcoords;
1822: PetscSection section;
1823: PetscReal *coords;
1824: PetscInt d,cdim,nl,nf,**ctxs;
1825: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1827: DMGetCoordinateDim(dm,&cdim);
1828: DMGetLocalSection(dm,§ion);
1829: PetscSectionGetNumFields(section,&nf);
1830: DMCreateGlobalVector(dm,&vcoords);
1831: VecGetLocalSize(vcoords,&nl);
1832: PetscMalloc1(nl*cdim,&coords);
1833: PetscMalloc2(nf,&funcs,nf,&ctxs);
1834: PetscMalloc1(nf,&ctxs[0]);
1835: for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1836: for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1837: for (d=0;d<cdim;d++) {
1838: PetscInt i;
1839: const PetscScalar *v;
1841: for (i=0;i<nf;i++) ctxs[i][0] = d;
1842: DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1843: VecGetArrayRead(vcoords,&v);
1844: for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1845: VecRestoreArrayRead(vcoords,&v);
1846: }
1847: VecDestroy(&vcoords);
1848: PCSetCoordinates(pc,cdim,nl,coords);
1849: PetscFree(coords);
1850: PetscFree(ctxs[0]);
1851: PetscFree2(funcs,ctxs);
1852: }
1853: }
1854: return(0);
1855: }
1857: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1858: {
1859: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1860: PetscErrorCode ierr;
1861: IS nis;
1862: const PetscInt *idxs;
1863: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1864: PetscBool *ld;
1867: if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1868: if (mop == MPI_LAND) {
1869: /* init rootdata with true */
1870: ld = (PetscBool*) matis->sf_rootdata;
1871: for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1872: } else {
1873: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
1874: }
1875: PetscArrayzero(matis->sf_leafdata,n);
1876: ISGetLocalSize(*is,&nd);
1877: ISGetIndices(*is,&idxs);
1878: ld = (PetscBool*) matis->sf_leafdata;
1879: for (i=0;i<nd;i++)
1880: if (-1 < idxs[i] && idxs[i] < n)
1881: ld[idxs[i]] = PETSC_TRUE;
1882: ISRestoreIndices(*is,&idxs);
1883: PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1884: PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1885: PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1886: PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1887: if (mop == MPI_LAND) {
1888: PetscMalloc1(nd,&nidxs);
1889: } else {
1890: PetscMalloc1(n,&nidxs);
1891: }
1892: for (i=0,nnd=0;i<n;i++)
1893: if (ld[i])
1894: nidxs[nnd++] = i;
1895: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1896: ISDestroy(is);
1897: *is = nis;
1898: return(0);
1899: }
1901: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1902: {
1903: PC_IS *pcis = (PC_IS*)(pc->data);
1904: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1905: PetscErrorCode ierr;
1908: if (!pcbddc->benign_have_null) {
1909: return(0);
1910: }
1911: if (pcbddc->ChangeOfBasisMatrix) {
1912: Vec swap;
1914: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1915: swap = pcbddc->work_change;
1916: pcbddc->work_change = r;
1917: r = swap;
1918: }
1919: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1920: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1921: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1922: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1923: VecSet(z,0.);
1924: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1925: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1926: if (pcbddc->ChangeOfBasisMatrix) {
1927: pcbddc->work_change = r;
1928: VecCopy(z,pcbddc->work_change);
1929: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1930: }
1931: return(0);
1932: }
1934: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1935: {
1936: PCBDDCBenignMatMult_ctx ctx;
1937: PetscErrorCode ierr;
1938: PetscBool apply_right,apply_left,reset_x;
1941: MatShellGetContext(A,&ctx);
1942: if (transpose) {
1943: apply_right = ctx->apply_left;
1944: apply_left = ctx->apply_right;
1945: } else {
1946: apply_right = ctx->apply_right;
1947: apply_left = ctx->apply_left;
1948: }
1949: reset_x = PETSC_FALSE;
1950: if (apply_right) {
1951: const PetscScalar *ax;
1952: PetscInt nl,i;
1954: VecGetLocalSize(x,&nl);
1955: VecGetArrayRead(x,&ax);
1956: PetscArraycpy(ctx->work,ax,nl);
1957: VecRestoreArrayRead(x,&ax);
1958: for (i=0;i<ctx->benign_n;i++) {
1959: PetscScalar sum,val;
1960: const PetscInt *idxs;
1961: PetscInt nz,j;
1962: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1963: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1964: sum = 0.;
1965: if (ctx->apply_p0) {
1966: val = ctx->work[idxs[nz-1]];
1967: for (j=0;j<nz-1;j++) {
1968: sum += ctx->work[idxs[j]];
1969: ctx->work[idxs[j]] += val;
1970: }
1971: } else {
1972: for (j=0;j<nz-1;j++) {
1973: sum += ctx->work[idxs[j]];
1974: }
1975: }
1976: ctx->work[idxs[nz-1]] -= sum;
1977: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1978: }
1979: VecPlaceArray(x,ctx->work);
1980: reset_x = PETSC_TRUE;
1981: }
1982: if (transpose) {
1983: MatMultTranspose(ctx->A,x,y);
1984: } else {
1985: MatMult(ctx->A,x,y);
1986: }
1987: if (reset_x) {
1988: VecResetArray(x);
1989: }
1990: if (apply_left) {
1991: PetscScalar *ay;
1992: PetscInt i;
1994: VecGetArray(y,&ay);
1995: for (i=0;i<ctx->benign_n;i++) {
1996: PetscScalar sum,val;
1997: const PetscInt *idxs;
1998: PetscInt nz,j;
1999: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
2000: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
2001: val = -ay[idxs[nz-1]];
2002: if (ctx->apply_p0) {
2003: sum = 0.;
2004: for (j=0;j<nz-1;j++) {
2005: sum += ay[idxs[j]];
2006: ay[idxs[j]] += val;
2007: }
2008: ay[idxs[nz-1]] += sum;
2009: } else {
2010: for (j=0;j<nz-1;j++) {
2011: ay[idxs[j]] += val;
2012: }
2013: ay[idxs[nz-1]] = 0.;
2014: }
2015: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2016: }
2017: VecRestoreArray(y,&ay);
2018: }
2019: return(0);
2020: }
2022: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2023: {
2027: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2028: return(0);
2029: }
2031: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2032: {
2036: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2037: return(0);
2038: }
2040: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2041: {
2042: PC_IS *pcis = (PC_IS*)pc->data;
2043: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2044: PCBDDCBenignMatMult_ctx ctx;
2045: PetscErrorCode ierr;
2048: if (!restore) {
2049: Mat A_IB,A_BI;
2050: PetscScalar *work;
2051: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2053: if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2054: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2055: PetscMalloc1(pcis->n,&work);
2056: MatCreate(PETSC_COMM_SELF,&A_IB);
2057: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2058: MatSetType(A_IB,MATSHELL);
2059: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2060: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2061: PetscNew(&ctx);
2062: MatShellSetContext(A_IB,ctx);
2063: ctx->apply_left = PETSC_TRUE;
2064: ctx->apply_right = PETSC_FALSE;
2065: ctx->apply_p0 = PETSC_FALSE;
2066: ctx->benign_n = pcbddc->benign_n;
2067: if (reuse) {
2068: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2069: ctx->free = PETSC_FALSE;
2070: } else { /* TODO: could be optimized for successive solves */
2071: ISLocalToGlobalMapping N_to_D;
2072: PetscInt i;
2074: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2075: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2076: for (i=0;i<pcbddc->benign_n;i++) {
2077: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2078: }
2079: ISLocalToGlobalMappingDestroy(&N_to_D);
2080: ctx->free = PETSC_TRUE;
2081: }
2082: ctx->A = pcis->A_IB;
2083: ctx->work = work;
2084: MatSetUp(A_IB);
2085: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2086: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2087: pcis->A_IB = A_IB;
2089: /* A_BI as A_IB^T */
2090: MatCreateTranspose(A_IB,&A_BI);
2091: pcbddc->benign_original_mat = pcis->A_BI;
2092: pcis->A_BI = A_BI;
2093: } else {
2094: if (!pcbddc->benign_original_mat) {
2095: return(0);
2096: }
2097: MatShellGetContext(pcis->A_IB,&ctx);
2098: MatDestroy(&pcis->A_IB);
2099: pcis->A_IB = ctx->A;
2100: ctx->A = NULL;
2101: MatDestroy(&pcis->A_BI);
2102: pcis->A_BI = pcbddc->benign_original_mat;
2103: pcbddc->benign_original_mat = NULL;
2104: if (ctx->free) {
2105: PetscInt i;
2106: for (i=0;i<ctx->benign_n;i++) {
2107: ISDestroy(&ctx->benign_zerodiag_subs[i]);
2108: }
2109: PetscFree(ctx->benign_zerodiag_subs);
2110: }
2111: PetscFree(ctx->work);
2112: PetscFree(ctx);
2113: }
2114: return(0);
2115: }
2117: /* used just in bddc debug mode */
2118: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2119: {
2120: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2121: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2122: Mat An;
2126: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2127: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2128: if (is1) {
2129: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2130: MatDestroy(&An);
2131: } else {
2132: *B = An;
2133: }
2134: return(0);
2135: }
2137: /* TODO: add reuse flag */
2138: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2139: {
2140: Mat Bt;
2141: PetscScalar *a,*bdata;
2142: const PetscInt *ii,*ij;
2143: PetscInt m,n,i,nnz,*bii,*bij;
2144: PetscBool flg_row;
2148: MatGetSize(A,&n,&m);
2149: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2150: MatSeqAIJGetArray(A,&a);
2151: nnz = n;
2152: for (i=0;i<ii[n];i++) {
2153: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2154: }
2155: PetscMalloc1(n+1,&bii);
2156: PetscMalloc1(nnz,&bij);
2157: PetscMalloc1(nnz,&bdata);
2158: nnz = 0;
2159: bii[0] = 0;
2160: for (i=0;i<n;i++) {
2161: PetscInt j;
2162: for (j=ii[i];j<ii[i+1];j++) {
2163: PetscScalar entry = a[j];
2164: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2165: bij[nnz] = ij[j];
2166: bdata[nnz] = entry;
2167: nnz++;
2168: }
2169: }
2170: bii[i+1] = nnz;
2171: }
2172: MatSeqAIJRestoreArray(A,&a);
2173: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2174: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2175: {
2176: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2177: b->free_a = PETSC_TRUE;
2178: b->free_ij = PETSC_TRUE;
2179: }
2180: if (*B == A) {
2181: MatDestroy(&A);
2182: }
2183: *B = Bt;
2184: return(0);
2185: }
2187: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2188: {
2189: Mat B = NULL;
2190: DM dm;
2191: IS is_dummy,*cc_n;
2192: ISLocalToGlobalMapping l2gmap_dummy;
2193: PCBDDCGraph graph;
2194: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2195: PetscInt i,n;
2196: PetscInt *xadj,*adjncy;
2197: PetscBool isplex = PETSC_FALSE;
2198: PetscErrorCode ierr;
2201: if (ncc) *ncc = 0;
2202: if (cc) *cc = NULL;
2203: if (primalv) *primalv = NULL;
2204: PCBDDCGraphCreate(&graph);
2205: MatGetDM(pc->pmat,&dm);
2206: if (!dm) {
2207: PCGetDM(pc,&dm);
2208: }
2209: if (dm) {
2210: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2211: }
2212: if (filter) isplex = PETSC_FALSE;
2214: if (isplex) { /* this code has been modified from plexpartition.c */
2215: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2216: PetscInt *adj = NULL;
2217: IS cellNumbering;
2218: const PetscInt *cellNum;
2219: PetscBool useCone, useClosure;
2220: PetscSection section;
2221: PetscSegBuffer adjBuffer;
2222: PetscSF sfPoint;
2226: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2227: DMGetPointSF(dm, &sfPoint);
2228: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2229: /* Build adjacency graph via a section/segbuffer */
2230: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2231: PetscSectionSetChart(section, pStart, pEnd);
2232: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2233: /* Always use FVM adjacency to create partitioner graph */
2234: DMGetBasicAdjacency(dm, &useCone, &useClosure);
2235: DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2236: DMPlexGetCellNumbering(dm, &cellNumbering);
2237: ISGetIndices(cellNumbering, &cellNum);
2238: for (n = 0, p = pStart; p < pEnd; p++) {
2239: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2240: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2241: adjSize = PETSC_DETERMINE;
2242: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2243: for (a = 0; a < adjSize; ++a) {
2244: const PetscInt point = adj[a];
2245: if (pStart <= point && point < pEnd) {
2246: PetscInt *PETSC_RESTRICT pBuf;
2247: PetscSectionAddDof(section, p, 1);
2248: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2249: *pBuf = point;
2250: }
2251: }
2252: n++;
2253: }
2254: DMSetBasicAdjacency(dm, useCone, useClosure);
2255: /* Derive CSR graph from section/segbuffer */
2256: PetscSectionSetUp(section);
2257: PetscSectionGetStorageSize(section, &size);
2258: PetscMalloc1(n+1, &xadj);
2259: for (idx = 0, p = pStart; p < pEnd; p++) {
2260: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2261: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2262: }
2263: xadj[n] = size;
2264: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2265: /* Clean up */
2266: PetscSegBufferDestroy(&adjBuffer);
2267: PetscSectionDestroy(§ion);
2268: PetscFree(adj);
2269: graph->xadj = xadj;
2270: graph->adjncy = adjncy;
2271: } else {
2272: Mat A;
2273: PetscBool isseqaij, flg_row;
2275: MatISGetLocalMat(pc->pmat,&A);
2276: if (!A->rmap->N || !A->cmap->N) {
2277: PCBDDCGraphDestroy(&graph);
2278: return(0);
2279: }
2280: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2281: if (!isseqaij && filter) {
2282: PetscBool isseqdense;
2284: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2285: if (!isseqdense) {
2286: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2287: } else { /* TODO: rectangular case and LDA */
2288: PetscScalar *array;
2289: PetscReal chop=1.e-6;
2291: MatDuplicate(A,MAT_COPY_VALUES,&B);
2292: MatDenseGetArray(B,&array);
2293: MatGetSize(B,&n,NULL);
2294: for (i=0;i<n;i++) {
2295: PetscInt j;
2296: for (j=i+1;j<n;j++) {
2297: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2298: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2299: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2300: }
2301: }
2302: MatDenseRestoreArray(B,&array);
2303: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2304: }
2305: } else {
2306: PetscObjectReference((PetscObject)A);
2307: B = A;
2308: }
2309: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2311: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2312: if (filter) {
2313: PetscScalar *data;
2314: PetscInt j,cum;
2316: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2317: MatSeqAIJGetArray(B,&data);
2318: cum = 0;
2319: for (i=0;i<n;i++) {
2320: PetscInt t;
2322: for (j=xadj[i];j<xadj[i+1];j++) {
2323: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2324: continue;
2325: }
2326: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2327: }
2328: t = xadj_filtered[i];
2329: xadj_filtered[i] = cum;
2330: cum += t;
2331: }
2332: MatSeqAIJRestoreArray(B,&data);
2333: graph->xadj = xadj_filtered;
2334: graph->adjncy = adjncy_filtered;
2335: } else {
2336: graph->xadj = xadj;
2337: graph->adjncy = adjncy;
2338: }
2339: }
2340: /* compute local connected components using PCBDDCGraph */
2341: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2342: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2343: ISDestroy(&is_dummy);
2344: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2345: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2346: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2347: PCBDDCGraphComputeConnectedComponents(graph);
2349: /* partial clean up */
2350: PetscFree2(xadj_filtered,adjncy_filtered);
2351: if (B) {
2352: PetscBool flg_row;
2353: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2354: MatDestroy(&B);
2355: }
2356: if (isplex) {
2357: PetscFree(xadj);
2358: PetscFree(adjncy);
2359: }
2361: /* get back data */
2362: if (isplex) {
2363: if (ncc) *ncc = graph->ncc;
2364: if (cc || primalv) {
2365: Mat A;
2366: PetscBT btv,btvt;
2367: PetscSection subSection;
2368: PetscInt *ids,cum,cump,*cids,*pids;
2370: DMPlexGetSubdomainSection(dm,&subSection);
2371: MatISGetLocalMat(pc->pmat,&A);
2372: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2373: PetscBTCreate(A->rmap->n,&btv);
2374: PetscBTCreate(A->rmap->n,&btvt);
2376: cids[0] = 0;
2377: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2378: PetscInt j;
2380: PetscBTMemzero(A->rmap->n,btvt);
2381: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2382: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2384: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2385: for (k = 0; k < 2*size; k += 2) {
2386: PetscInt s, pp, p = closure[k], off, dof, cdof;
2388: PetscSectionGetConstraintDof(subSection,p,&cdof);
2389: PetscSectionGetOffset(subSection,p,&off);
2390: PetscSectionGetDof(subSection,p,&dof);
2391: for (s = 0; s < dof-cdof; s++) {
2392: if (PetscBTLookupSet(btvt,off+s)) continue;
2393: if (!PetscBTLookup(btv,off+s)) {
2394: ids[cum++] = off+s;
2395: } else { /* cross-vertex */
2396: pids[cump++] = off+s;
2397: }
2398: }
2399: DMPlexGetTreeParent(dm,p,&pp,NULL);
2400: if (pp != p) {
2401: PetscSectionGetConstraintDof(subSection,pp,&cdof);
2402: PetscSectionGetOffset(subSection,pp,&off);
2403: PetscSectionGetDof(subSection,pp,&dof);
2404: for (s = 0; s < dof-cdof; s++) {
2405: if (PetscBTLookupSet(btvt,off+s)) continue;
2406: if (!PetscBTLookup(btv,off+s)) {
2407: ids[cum++] = off+s;
2408: } else { /* cross-vertex */
2409: pids[cump++] = off+s;
2410: }
2411: }
2412: }
2413: }
2414: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2415: }
2416: cids[i+1] = cum;
2417: /* mark dofs as already assigned */
2418: for (j = cids[i]; j < cids[i+1]; j++) {
2419: PetscBTSet(btv,ids[j]);
2420: }
2421: }
2422: if (cc) {
2423: PetscMalloc1(graph->ncc,&cc_n);
2424: for (i = 0; i < graph->ncc; i++) {
2425: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2426: }
2427: *cc = cc_n;
2428: }
2429: if (primalv) {
2430: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2431: }
2432: PetscFree3(ids,cids,pids);
2433: PetscBTDestroy(&btv);
2434: PetscBTDestroy(&btvt);
2435: }
2436: } else {
2437: if (ncc) *ncc = graph->ncc;
2438: if (cc) {
2439: PetscMalloc1(graph->ncc,&cc_n);
2440: for (i=0;i<graph->ncc;i++) {
2441: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2442: }
2443: *cc = cc_n;
2444: }
2445: }
2446: /* clean up graph */
2447: graph->xadj = 0;
2448: graph->adjncy = 0;
2449: PCBDDCGraphDestroy(&graph);
2450: return(0);
2451: }
2453: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2454: {
2455: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2456: PC_IS* pcis = (PC_IS*)(pc->data);
2457: IS dirIS = NULL;
2458: PetscInt i;
2462: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2463: if (zerodiag) {
2464: Mat A;
2465: Vec vec3_N;
2466: PetscScalar *vals;
2467: const PetscInt *idxs;
2468: PetscInt nz,*count;
2470: /* p0 */
2471: VecSet(pcis->vec1_N,0.);
2472: PetscMalloc1(pcis->n,&vals);
2473: ISGetLocalSize(zerodiag,&nz);
2474: ISGetIndices(zerodiag,&idxs);
2475: for (i=0;i<nz;i++) vals[i] = 1.;
2476: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2477: VecAssemblyBegin(pcis->vec1_N);
2478: VecAssemblyEnd(pcis->vec1_N);
2479: /* v_I */
2480: VecSetRandom(pcis->vec2_N,NULL);
2481: for (i=0;i<nz;i++) vals[i] = 0.;
2482: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2483: ISRestoreIndices(zerodiag,&idxs);
2484: ISGetIndices(pcis->is_B_local,&idxs);
2485: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2486: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2487: ISRestoreIndices(pcis->is_B_local,&idxs);
2488: if (dirIS) {
2489: PetscInt n;
2491: ISGetLocalSize(dirIS,&n);
2492: ISGetIndices(dirIS,&idxs);
2493: for (i=0;i<n;i++) vals[i] = 0.;
2494: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2495: ISRestoreIndices(dirIS,&idxs);
2496: }
2497: VecAssemblyBegin(pcis->vec2_N);
2498: VecAssemblyEnd(pcis->vec2_N);
2499: VecDuplicate(pcis->vec1_N,&vec3_N);
2500: VecSet(vec3_N,0.);
2501: MatISGetLocalMat(pc->pmat,&A);
2502: MatMult(A,pcis->vec1_N,vec3_N);
2503: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2504: if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2505: PetscFree(vals);
2506: VecDestroy(&vec3_N);
2508: /* there should not be any pressure dofs lying on the interface */
2509: PetscCalloc1(pcis->n,&count);
2510: ISGetIndices(pcis->is_B_local,&idxs);
2511: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2512: ISRestoreIndices(pcis->is_B_local,&idxs);
2513: ISGetIndices(zerodiag,&idxs);
2514: for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2515: ISRestoreIndices(zerodiag,&idxs);
2516: PetscFree(count);
2517: }
2518: ISDestroy(&dirIS);
2520: /* check PCBDDCBenignGetOrSetP0 */
2521: VecSetRandom(pcis->vec1_global,NULL);
2522: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2523: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2524: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2525: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2526: for (i=0;i<pcbddc->benign_n;i++) {
2527: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2528: if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2529: }
2530: return(0);
2531: }
2533: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2534: {
2535: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2536: IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2537: PetscInt nz,n,benign_n,bsp = 1;
2538: PetscInt *interior_dofs,n_interior_dofs,nneu;
2539: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2543: if (reuse) goto project_b0;
2544: PetscSFDestroy(&pcbddc->benign_sf);
2545: MatDestroy(&pcbddc->benign_B0);
2546: for (n=0;n<pcbddc->benign_n;n++) {
2547: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2548: }
2549: PetscFree(pcbddc->benign_zerodiag_subs);
2550: has_null_pressures = PETSC_TRUE;
2551: have_null = PETSC_TRUE;
2552: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2553: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2554: Checks if all the pressure dofs in each subdomain have a zero diagonal
2555: If not, a change of basis on pressures is not needed
2556: since the local Schur complements are already SPD
2557: */
2558: if (pcbddc->n_ISForDofsLocal) {
2559: IS iP = NULL;
2560: PetscInt p,*pp;
2561: PetscBool flg;
2563: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2564: n = pcbddc->n_ISForDofsLocal;
2565: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2566: PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2567: PetscOptionsEnd();
2568: if (!flg) {
2569: n = 1;
2570: pp[0] = pcbddc->n_ISForDofsLocal-1;
2571: }
2573: bsp = 0;
2574: for (p=0;p<n;p++) {
2575: PetscInt bs;
2577: if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2578: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2579: bsp += bs;
2580: }
2581: PetscMalloc1(bsp,&bzerodiag);
2582: bsp = 0;
2583: for (p=0;p<n;p++) {
2584: const PetscInt *idxs;
2585: PetscInt b,bs,npl,*bidxs;
2587: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2588: ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2589: ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2590: PetscMalloc1(npl/bs,&bidxs);
2591: for (b=0;b<bs;b++) {
2592: PetscInt i;
2594: for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2595: ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2596: bsp++;
2597: }
2598: PetscFree(bidxs);
2599: ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2600: }
2601: ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);
2603: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2604: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2605: if (iP) {
2606: IS newpressures;
2608: ISDifference(pressures,iP,&newpressures);
2609: ISDestroy(&pressures);
2610: pressures = newpressures;
2611: }
2612: ISSorted(pressures,&sorted);
2613: if (!sorted) {
2614: ISSort(pressures);
2615: }
2616: PetscFree(pp);
2617: }
2619: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2620: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2621: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2622: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2623: ISSorted(zerodiag,&sorted);
2624: if (!sorted) {
2625: ISSort(zerodiag);
2626: }
2627: PetscObjectReference((PetscObject)zerodiag);
2628: zerodiag_save = zerodiag;
2629: ISGetLocalSize(zerodiag,&nz);
2630: if (!nz) {
2631: if (n) have_null = PETSC_FALSE;
2632: has_null_pressures = PETSC_FALSE;
2633: ISDestroy(&zerodiag);
2634: }
2635: recompute_zerodiag = PETSC_FALSE;
2637: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2638: zerodiag_subs = NULL;
2639: benign_n = 0;
2640: n_interior_dofs = 0;
2641: interior_dofs = NULL;
2642: nneu = 0;
2643: if (pcbddc->NeumannBoundariesLocal) {
2644: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2645: }
2646: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2647: if (checkb) { /* need to compute interior nodes */
2648: PetscInt n,i,j;
2649: PetscInt n_neigh,*neigh,*n_shared,**shared;
2650: PetscInt *iwork;
2652: ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2653: ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2654: PetscCalloc1(n,&iwork);
2655: PetscMalloc1(n,&interior_dofs);
2656: for (i=1;i<n_neigh;i++)
2657: for (j=0;j<n_shared[i];j++)
2658: iwork[shared[i][j]] += 1;
2659: for (i=0;i<n;i++)
2660: if (!iwork[i])
2661: interior_dofs[n_interior_dofs++] = i;
2662: PetscFree(iwork);
2663: ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2664: }
2665: if (has_null_pressures) {
2666: IS *subs;
2667: PetscInt nsubs,i,j,nl;
2668: const PetscInt *idxs;
2669: PetscScalar *array;
2670: Vec *work;
2671: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2673: subs = pcbddc->local_subs;
2674: nsubs = pcbddc->n_local_subs;
2675: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2676: if (checkb) {
2677: VecDuplicateVecs(matis->y,2,&work);
2678: ISGetLocalSize(zerodiag,&nl);
2679: ISGetIndices(zerodiag,&idxs);
2680: /* work[0] = 1_p */
2681: VecSet(work[0],0.);
2682: VecGetArray(work[0],&array);
2683: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2684: VecRestoreArray(work[0],&array);
2685: /* work[0] = 1_v */
2686: VecSet(work[1],1.);
2687: VecGetArray(work[1],&array);
2688: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2689: VecRestoreArray(work[1],&array);
2690: ISRestoreIndices(zerodiag,&idxs);
2691: }
2693: if (nsubs > 1 || bsp > 1) {
2694: IS *is;
2695: PetscInt b,totb;
2697: totb = bsp;
2698: is = bsp > 1 ? bzerodiag : &zerodiag;
2699: nsubs = PetscMax(nsubs,1);
2700: PetscCalloc1(nsubs*totb,&zerodiag_subs);
2701: for (b=0;b<totb;b++) {
2702: for (i=0;i<nsubs;i++) {
2703: ISLocalToGlobalMapping l2g;
2704: IS t_zerodiag_subs;
2705: PetscInt nl;
2707: if (subs) {
2708: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2709: } else {
2710: IS tis;
2712: MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2713: ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2714: ISLocalToGlobalMappingCreateIS(tis,&l2g);
2715: ISDestroy(&tis);
2716: }
2717: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2718: ISGetLocalSize(t_zerodiag_subs,&nl);
2719: if (nl) {
2720: PetscBool valid = PETSC_TRUE;
2722: if (checkb) {
2723: VecSet(matis->x,0);
2724: ISGetLocalSize(subs[i],&nl);
2725: ISGetIndices(subs[i],&idxs);
2726: VecGetArray(matis->x,&array);
2727: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2728: VecRestoreArray(matis->x,&array);
2729: ISRestoreIndices(subs[i],&idxs);
2730: VecPointwiseMult(matis->x,work[0],matis->x);
2731: MatMult(matis->A,matis->x,matis->y);
2732: VecPointwiseMult(matis->y,work[1],matis->y);
2733: VecGetArray(matis->y,&array);
2734: for (j=0;j<n_interior_dofs;j++) {
2735: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2736: valid = PETSC_FALSE;
2737: break;
2738: }
2739: }
2740: VecRestoreArray(matis->y,&array);
2741: }
2742: if (valid && nneu) {
2743: const PetscInt *idxs;
2744: PetscInt nzb;
2746: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2747: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2748: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2749: if (nzb) valid = PETSC_FALSE;
2750: }
2751: if (valid && pressures) {
2752: IS t_pressure_subs,tmp;
2753: PetscInt i1,i2;
2755: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2756: ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2757: ISGetLocalSize(tmp,&i1);
2758: ISGetLocalSize(t_zerodiag_subs,&i2);
2759: if (i2 != i1) valid = PETSC_FALSE;
2760: ISDestroy(&t_pressure_subs);
2761: ISDestroy(&tmp);
2762: }
2763: if (valid) {
2764: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2765: benign_n++;
2766: } else recompute_zerodiag = PETSC_TRUE;
2767: }
2768: ISDestroy(&t_zerodiag_subs);
2769: ISLocalToGlobalMappingDestroy(&l2g);
2770: }
2771: }
2772: } else { /* there's just one subdomain (or zero if they have not been detected */
2773: PetscBool valid = PETSC_TRUE;
2775: if (nneu) valid = PETSC_FALSE;
2776: if (valid && pressures) {
2777: ISEqual(pressures,zerodiag,&valid);
2778: }
2779: if (valid && checkb) {
2780: MatMult(matis->A,work[0],matis->x);
2781: VecPointwiseMult(matis->x,work[1],matis->x);
2782: VecGetArray(matis->x,&array);
2783: for (j=0;j<n_interior_dofs;j++) {
2784: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2785: valid = PETSC_FALSE;
2786: break;
2787: }
2788: }
2789: VecRestoreArray(matis->x,&array);
2790: }
2791: if (valid) {
2792: benign_n = 1;
2793: PetscMalloc1(benign_n,&zerodiag_subs);
2794: PetscObjectReference((PetscObject)zerodiag);
2795: zerodiag_subs[0] = zerodiag;
2796: }
2797: }
2798: if (checkb) {
2799: VecDestroyVecs(2,&work);
2800: }
2801: }
2802: PetscFree(interior_dofs);
2804: if (!benign_n) {
2805: PetscInt n;
2807: ISDestroy(&zerodiag);
2808: recompute_zerodiag = PETSC_FALSE;
2809: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2810: if (n) have_null = PETSC_FALSE;
2811: }
2813: /* final check for null pressures */
2814: if (zerodiag && pressures) {
2815: ISEqual(pressures,zerodiag,&have_null);
2816: }
2818: if (recompute_zerodiag) {
2819: ISDestroy(&zerodiag);
2820: if (benign_n == 1) {
2821: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2822: zerodiag = zerodiag_subs[0];
2823: } else {
2824: PetscInt i,nzn,*new_idxs;
2826: nzn = 0;
2827: for (i=0;i<benign_n;i++) {
2828: PetscInt ns;
2829: ISGetLocalSize(zerodiag_subs[i],&ns);
2830: nzn += ns;
2831: }
2832: PetscMalloc1(nzn,&new_idxs);
2833: nzn = 0;
2834: for (i=0;i<benign_n;i++) {
2835: PetscInt ns,*idxs;
2836: ISGetLocalSize(zerodiag_subs[i],&ns);
2837: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2838: PetscArraycpy(new_idxs+nzn,idxs,ns);
2839: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2840: nzn += ns;
2841: }
2842: PetscSortInt(nzn,new_idxs);
2843: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2844: }
2845: have_null = PETSC_FALSE;
2846: }
2848: /* determines if the coarse solver will be singular or not */
2849: MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2851: /* Prepare matrix to compute no-net-flux */
2852: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2853: Mat A,loc_divudotp;
2854: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2855: IS row,col,isused = NULL;
2856: PetscInt M,N,n,st,n_isused;
2858: if (pressures) {
2859: isused = pressures;
2860: } else {
2861: isused = zerodiag_save;
2862: }
2863: MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2864: MatISGetLocalMat(pc->pmat,&A);
2865: MatGetLocalSize(A,&n,NULL);
2866: if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2867: n_isused = 0;
2868: if (isused) {
2869: ISGetLocalSize(isused,&n_isused);
2870: }
2871: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2872: st = st-n_isused;
2873: if (n) {
2874: const PetscInt *gidxs;
2876: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2877: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2878: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2879: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2880: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2881: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2882: } else {
2883: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2884: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2885: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2886: }
2887: MatGetSize(pc->pmat,NULL,&N);
2888: ISGetSize(row,&M);
2889: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2890: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2891: ISDestroy(&row);
2892: ISDestroy(&col);
2893: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2894: MatSetType(pcbddc->divudotp,MATIS);
2895: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2896: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2897: ISLocalToGlobalMappingDestroy(&rl2g);
2898: ISLocalToGlobalMappingDestroy(&cl2g);
2899: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2900: MatDestroy(&loc_divudotp);
2901: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2902: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2903: }
2904: ISDestroy(&zerodiag_save);
2905: ISDestroy(&pressures);
2906: if (bzerodiag) {
2907: PetscInt i;
2909: for (i=0;i<bsp;i++) {
2910: ISDestroy(&bzerodiag[i]);
2911: }
2912: PetscFree(bzerodiag);
2913: }
2914: pcbddc->benign_n = benign_n;
2915: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2917: /* determines if the problem has subdomains with 0 pressure block */
2918: have_null = (PetscBool)(!!pcbddc->benign_n);
2919: MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2921: project_b0:
2922: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2923: /* change of basis and p0 dofs */
2924: if (pcbddc->benign_n) {
2925: PetscInt i,s,*nnz;
2927: /* local change of basis for pressures */
2928: MatDestroy(&pcbddc->benign_change);
2929: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2930: MatSetType(pcbddc->benign_change,MATAIJ);
2931: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2932: PetscMalloc1(n,&nnz);
2933: for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2934: for (i=0;i<pcbddc->benign_n;i++) {
2935: const PetscInt *idxs;
2936: PetscInt nzs,j;
2938: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2939: ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2940: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2941: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2942: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2943: }
2944: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2945: MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2946: PetscFree(nnz);
2947: /* set identity by default */
2948: for (i=0;i<n;i++) {
2949: MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2950: }
2951: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2952: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2953: /* set change on pressures */
2954: for (s=0;s<pcbddc->benign_n;s++) {
2955: PetscScalar *array;
2956: const PetscInt *idxs;
2957: PetscInt nzs;
2959: ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2960: ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2961: for (i=0;i<nzs-1;i++) {
2962: PetscScalar vals[2];
2963: PetscInt cols[2];
2965: cols[0] = idxs[i];
2966: cols[1] = idxs[nzs-1];
2967: vals[0] = 1.;
2968: vals[1] = 1.;
2969: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2970: }
2971: PetscMalloc1(nzs,&array);
2972: for (i=0;i<nzs-1;i++) array[i] = -1.;
2973: array[nzs-1] = 1.;
2974: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2975: /* store local idxs for p0 */
2976: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2977: ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2978: PetscFree(array);
2979: }
2980: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2981: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2983: /* project if needed */
2984: if (pcbddc->benign_change_explicit) {
2985: Mat M;
2987: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2988: MatDestroy(&pcbddc->local_mat);
2989: MatSeqAIJCompress(M,&pcbddc->local_mat);
2990: MatDestroy(&M);
2991: }
2992: /* store global idxs for p0 */
2993: ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2994: }
2995: *zerodiaglocal = zerodiag;
2996: return(0);
2997: }
2999: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3000: {
3001: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3002: PetscScalar *array;
3006: if (!pcbddc->benign_sf) {
3007: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
3008: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
3009: }
3010: if (get) {
3011: VecGetArrayRead(v,(const PetscScalar**)&array);
3012: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3013: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3014: VecRestoreArrayRead(v,(const PetscScalar**)&array);
3015: } else {
3016: VecGetArray(v,&array);
3017: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3018: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3019: VecRestoreArray(v,&array);
3020: }
3021: return(0);
3022: }
3024: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3025: {
3026: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3030: /* TODO: add error checking
3031: - avoid nested pop (or push) calls.
3032: - cannot push before pop.
3033: - cannot call this if pcbddc->local_mat is NULL
3034: */
3035: if (!pcbddc->benign_n) {
3036: return(0);
3037: }
3038: if (pop) {
3039: if (pcbddc->benign_change_explicit) {
3040: IS is_p0;
3041: MatReuse reuse;
3043: /* extract B_0 */
3044: reuse = MAT_INITIAL_MATRIX;
3045: if (pcbddc->benign_B0) {
3046: reuse = MAT_REUSE_MATRIX;
3047: }
3048: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3049: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3050: /* remove rows and cols from local problem */
3051: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3052: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3053: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3054: ISDestroy(&is_p0);
3055: } else {
3056: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3057: PetscScalar *vals;
3058: PetscInt i,n,*idxs_ins;
3060: VecGetLocalSize(matis->y,&n);
3061: PetscMalloc2(n,&idxs_ins,n,&vals);
3062: if (!pcbddc->benign_B0) {
3063: PetscInt *nnz;
3064: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3065: MatSetType(pcbddc->benign_B0,MATAIJ);
3066: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3067: PetscMalloc1(pcbddc->benign_n,&nnz);
3068: for (i=0;i<pcbddc->benign_n;i++) {
3069: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3070: nnz[i] = n - nnz[i];
3071: }
3072: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3073: MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3074: PetscFree(nnz);
3075: }
3077: for (i=0;i<pcbddc->benign_n;i++) {
3078: PetscScalar *array;
3079: PetscInt *idxs,j,nz,cum;
3081: VecSet(matis->x,0.);
3082: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3083: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3084: for (j=0;j<nz;j++) vals[j] = 1.;
3085: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3086: VecAssemblyBegin(matis->x);
3087: VecAssemblyEnd(matis->x);
3088: VecSet(matis->y,0.);
3089: MatMult(matis->A,matis->x,matis->y);
3090: VecGetArray(matis->y,&array);
3091: cum = 0;
3092: for (j=0;j<n;j++) {
3093: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3094: vals[cum] = array[j];
3095: idxs_ins[cum] = j;
3096: cum++;
3097: }
3098: }
3099: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3100: VecRestoreArray(matis->y,&array);
3101: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3102: }
3103: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3104: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3105: PetscFree2(idxs_ins,vals);
3106: }
3107: } else { /* push */
3108: if (pcbddc->benign_change_explicit) {
3109: PetscInt i;
3111: for (i=0;i<pcbddc->benign_n;i++) {
3112: PetscScalar *B0_vals;
3113: PetscInt *B0_cols,B0_ncol;
3115: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3116: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3117: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3118: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3119: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3120: }
3121: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3122: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3123: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3124: }
3125: return(0);
3126: }
3128: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3129: {
3130: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3131: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3132: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
3133: PetscBLASInt *B_iwork,*B_ifail;
3134: PetscScalar *work,lwork;
3135: PetscScalar *St,*S,*eigv;
3136: PetscScalar *Sarray,*Starray;
3137: PetscReal *eigs,thresh,lthresh,uthresh;
3138: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3139: PetscBool allocated_S_St;
3140: #if defined(PETSC_USE_COMPLEX)
3141: PetscReal *rwork;
3142: #endif
3143: PetscErrorCode ierr;
3146: if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3147: if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3148: if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3149: PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3151: if (pcbddc->dbg_flag) {
3152: PetscViewerFlush(pcbddc->dbg_viewer);
3153: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3154: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3155: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3156: }
3158: if (pcbddc->dbg_flag) {
3159: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3160: }
3162: /* max size of subsets */
3163: mss = 0;
3164: for (i=0;i<sub_schurs->n_subs;i++) {
3165: PetscInt subset_size;
3167: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3168: mss = PetscMax(mss,subset_size);
3169: }
3171: /* min/max and threshold */
3172: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3173: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3174: nmax = PetscMax(nmin,nmax);
3175: allocated_S_St = PETSC_FALSE;
3176: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3177: allocated_S_St = PETSC_TRUE;
3178: }
3180: /* allocate lapack workspace */
3181: cum = cum2 = 0;
3182: maxneigs = 0;
3183: for (i=0;i<sub_schurs->n_subs;i++) {
3184: PetscInt n,subset_size;
3186: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3187: n = PetscMin(subset_size,nmax);
3188: cum += subset_size;
3189: cum2 += subset_size*n;
3190: maxneigs = PetscMax(maxneigs,n);
3191: }
3192: lwork = 0;
3193: if (mss) {
3194: if (sub_schurs->is_symmetric) {
3195: PetscScalar sdummy = 0.;
3196: PetscBLASInt B_itype = 1;
3197: PetscBLASInt B_N = mss, idummy = 0;
3198: PetscReal rdummy = 0.,zero = 0.0;
3199: PetscReal eps = 0.0; /* dlamch? */
3201: B_lwork = -1;
3202: /* some implementations may complain about NULL pointers, even if we are querying */
3203: S = &sdummy;
3204: St = &sdummy;
3205: eigs = &rdummy;
3206: eigv = &sdummy;
3207: B_iwork = &idummy;
3208: B_ifail = &idummy;
3209: #if defined(PETSC_USE_COMPLEX)
3210: rwork = &rdummy;
3211: #endif
3212: thresh = 1.0;
3213: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3214: #if defined(PETSC_USE_COMPLEX)
3215: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3216: #else
3217: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3218: #endif
3219: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3220: PetscFPTrapPop();
3221: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3222: }
3224: nv = 0;
3225: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3226: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3227: }
3228: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3229: if (allocated_S_St) {
3230: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3231: }
3232: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3233: #if defined(PETSC_USE_COMPLEX)
3234: PetscMalloc1(7*mss,&rwork);
3235: #endif
3236: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3237: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3238: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3239: nv+cum,&pcbddc->adaptive_constraints_idxs,
3240: nv+cum2,&pcbddc->adaptive_constraints_data);
3241: PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);
3243: maxneigs = 0;
3244: cum = cumarray = 0;
3245: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3246: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3247: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3248: const PetscInt *idxs;
3250: ISGetIndices(sub_schurs->is_vertices,&idxs);
3251: for (cum=0;cum<nv;cum++) {
3252: pcbddc->adaptive_constraints_n[cum] = 1;
3253: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3254: pcbddc->adaptive_constraints_data[cum] = 1.0;
3255: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3256: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3257: }
3258: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3259: }
3261: if (mss) { /* multilevel */
3262: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3263: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3264: }
3266: lthresh = pcbddc->adaptive_threshold[0];
3267: uthresh = pcbddc->adaptive_threshold[1];
3268: for (i=0;i<sub_schurs->n_subs;i++) {
3269: const PetscInt *idxs;
3270: PetscReal upper,lower;
3271: PetscInt j,subset_size,eigs_start = 0;
3272: PetscBLASInt B_N;
3273: PetscBool same_data = PETSC_FALSE;
3274: PetscBool scal = PETSC_FALSE;
3276: if (pcbddc->use_deluxe_scaling) {
3277: upper = PETSC_MAX_REAL;
3278: lower = uthresh;
3279: } else {
3280: if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3281: upper = 1./uthresh;
3282: lower = 0.;
3283: }
3284: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3285: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3286: PetscBLASIntCast(subset_size,&B_N);
3287: /* this is experimental: we assume the dofs have been properly grouped to have
3288: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3289: if (!sub_schurs->is_posdef) {
3290: Mat T;
3292: for (j=0;j<subset_size;j++) {
3293: if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3294: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3295: MatScale(T,-1.0);
3296: MatDestroy(&T);
3297: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3298: MatScale(T,-1.0);
3299: MatDestroy(&T);
3300: if (sub_schurs->change_primal_sub) {
3301: PetscInt nz,k;
3302: const PetscInt *idxs;
3304: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3305: ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3306: for (k=0;k<nz;k++) {
3307: *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3308: *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0;
3309: }
3310: ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3311: }
3312: scal = PETSC_TRUE;
3313: break;
3314: }
3315: }
3316: }
3318: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3319: if (sub_schurs->is_symmetric) {
3320: PetscInt j,k;
3321: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3322: PetscArrayzero(S,subset_size*subset_size);
3323: PetscArrayzero(St,subset_size*subset_size);
3324: }
3325: for (j=0;j<subset_size;j++) {
3326: for (k=j;k<subset_size;k++) {
3327: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3328: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3329: }
3330: }
3331: } else {
3332: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3333: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3334: }
3335: } else {
3336: S = Sarray + cumarray;
3337: St = Starray + cumarray;
3338: }
3339: /* see if we can save some work */
3340: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3341: PetscArraycmp(S,St,subset_size*subset_size,&same_data);
3342: }
3344: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3345: B_neigs = 0;
3346: } else {
3347: if (sub_schurs->is_symmetric) {
3348: PetscBLASInt B_itype = 1;
3349: PetscBLASInt B_IL, B_IU;
3350: PetscReal eps = -1.0; /* dlamch? */
3351: PetscInt nmin_s;
3352: PetscBool compute_range;
3354: B_neigs = 0;
3355: compute_range = (PetscBool)!same_data;
3356: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3358: if (pcbddc->dbg_flag) {
3359: PetscInt nc = 0;
3361: if (sub_schurs->change_primal_sub) {
3362: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3363: }
3364: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3365: }
3367: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3368: if (compute_range) {
3370: /* ask for eigenvalues larger than thresh */
3371: if (sub_schurs->is_posdef) {
3372: #if defined(PETSC_USE_COMPLEX)
3373: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3374: #else
3375: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3376: #endif
3377: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3378: } else { /* no theory so far, but it works nicely */
3379: PetscInt recipe = 0,recipe_m = 1;
3380: PetscReal bb[2];
3382: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3383: switch (recipe) {
3384: case 0:
3385: if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3386: else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3387: #if defined(PETSC_USE_COMPLEX)
3388: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3389: #else
3390: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3391: #endif
3392: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3393: break;
3394: case 1:
3395: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3396: #if defined(PETSC_USE_COMPLEX)
3397: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3398: #else
3399: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3400: #endif
3401: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3402: if (!scal) {
3403: PetscBLASInt B_neigs2 = 0;
3405: bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3406: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3407: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3408: #if defined(PETSC_USE_COMPLEX)
3409: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3410: #else
3411: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3412: #endif
3413: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3414: B_neigs += B_neigs2;
3415: }
3416: break;
3417: case 2:
3418: if (scal) {
3419: bb[0] = PETSC_MIN_REAL;
3420: bb[1] = 0;
3421: #if defined(PETSC_USE_COMPLEX)
3422: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3423: #else
3424: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3425: #endif
3426: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3427: } else {
3428: PetscBLASInt B_neigs2 = 0;
3429: PetscBool import = PETSC_FALSE;
3431: lthresh = PetscMax(lthresh,0.0);
3432: if (lthresh > 0.0) {
3433: bb[0] = PETSC_MIN_REAL;
3434: bb[1] = lthresh*lthresh;
3436: import = PETSC_TRUE;
3437: #if defined(PETSC_USE_COMPLEX)
3438: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3439: #else
3440: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3441: #endif
3442: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3443: }
3444: bb[0] = PetscMax(lthresh*lthresh,uthresh);
3445: bb[1] = PETSC_MAX_REAL;
3446: if (import) {
3447: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3448: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3449: }
3450: #if defined(PETSC_USE_COMPLEX)
3451: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3452: #else
3453: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3454: #endif
3455: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3456: B_neigs += B_neigs2;
3457: }
3458: break;
3459: case 3:
3460: if (scal) {
3461: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3462: } else {
3463: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3464: }
3465: if (!scal) {
3466: bb[0] = uthresh;
3467: bb[1] = PETSC_MAX_REAL;
3468: #if defined(PETSC_USE_COMPLEX)
3469: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3470: #else
3471: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3472: #endif
3473: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3474: }
3475: if (recipe_m > 0 && B_N - B_neigs > 0) {
3476: PetscBLASInt B_neigs2 = 0;
3478: B_IL = 1;
3479: PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3480: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3481: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3482: #if defined(PETSC_USE_COMPLEX)
3483: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3484: #else
3485: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3486: #endif
3487: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3488: B_neigs += B_neigs2;
3489: }
3490: break;
3491: case 4:
3492: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3493: #if defined(PETSC_USE_COMPLEX)
3494: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3495: #else
3496: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3497: #endif
3498: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3499: {
3500: PetscBLASInt B_neigs2 = 0;
3502: bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3503: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3504: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3505: #if defined(PETSC_USE_COMPLEX)
3506: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3507: #else
3508: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3509: #endif
3510: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3511: B_neigs += B_neigs2;
3512: }
3513: break;
3514: case 5: /* same as before: first compute all eigenvalues, then filter */
3515: #if defined(PETSC_USE_COMPLEX)
3516: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3517: #else
3518: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3519: #endif
3520: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3521: {
3522: PetscInt e,k,ne;
3523: for (e=0,ne=0;e<B_neigs;e++) {
3524: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3525: for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3526: eigs[ne] = eigs[e];
3527: ne++;
3528: }
3529: }
3530: PetscArraycpy(eigv,S,B_N*ne);
3531: B_neigs = ne;
3532: }
3533: break;
3534: default:
3535: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3536: break;
3537: }
3538: }
3539: } else if (!same_data) { /* this is just to see all the eigenvalues */
3540: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3541: B_IL = 1;
3542: #if defined(PETSC_USE_COMPLEX)
3543: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3544: #else
3545: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3546: #endif
3547: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3548: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3549: PetscInt k;
3550: if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3551: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3552: PetscBLASIntCast(nmax,&B_neigs);
3553: nmin = nmax;
3554: PetscArrayzero(eigv,subset_size*nmax);
3555: for (k=0;k<nmax;k++) {
3556: eigs[k] = 1./PETSC_SMALL;
3557: eigv[k*(subset_size+1)] = 1.0;
3558: }
3559: }
3560: PetscFPTrapPop();
3561: if (B_ierr) {
3562: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3563: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3564: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3565: }
3567: if (B_neigs > nmax) {
3568: if (pcbddc->dbg_flag) {
3569: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3570: }
3571: if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3572: B_neigs = nmax;
3573: }
3575: nmin_s = PetscMin(nmin,B_N);
3576: if (B_neigs < nmin_s) {
3577: PetscBLASInt B_neigs2 = 0;
3579: if (pcbddc->use_deluxe_scaling) {
3580: if (scal) {
3581: B_IU = nmin_s;
3582: B_IL = B_neigs + 1;
3583: } else {
3584: B_IL = B_N - nmin_s + 1;
3585: B_IU = B_N - B_neigs;
3586: }
3587: } else {
3588: B_IL = B_neigs + 1;
3589: B_IU = nmin_s;
3590: }
3591: if (pcbddc->dbg_flag) {
3592: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3593: }
3594: if (sub_schurs->is_symmetric) {
3595: PetscInt j,k;
3596: for (j=0;j<subset_size;j++) {
3597: for (k=j;k<subset_size;k++) {
3598: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3599: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3600: }
3601: }
3602: } else {
3603: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3604: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3605: }
3606: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3607: #if defined(PETSC_USE_COMPLEX)
3608: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3609: #else
3610: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3611: #endif
3612: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3613: PetscFPTrapPop();
3614: B_neigs += B_neigs2;
3615: }
3616: if (B_ierr) {
3617: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3618: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3619: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3620: }
3621: if (pcbddc->dbg_flag) {
3622: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3623: for (j=0;j<B_neigs;j++) {
3624: if (eigs[j] == 0.0) {
3625: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3626: } else {
3627: if (pcbddc->use_deluxe_scaling) {
3628: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3629: } else {
3630: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3631: }
3632: }
3633: }
3634: }
3635: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3636: }
3637: /* change the basis back to the original one */
3638: if (sub_schurs->change) {
3639: Mat change,phi,phit;
3641: if (pcbddc->dbg_flag > 2) {
3642: PetscInt ii;
3643: for (ii=0;ii<B_neigs;ii++) {
3644: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3645: for (j=0;j<B_N;j++) {
3646: #if defined(PETSC_USE_COMPLEX)
3647: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3648: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3649: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3650: #else
3651: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3652: #endif
3653: }
3654: }
3655: }
3656: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3657: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3658: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3659: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3660: MatDestroy(&phit);
3661: MatDestroy(&phi);
3662: }
3663: maxneigs = PetscMax(B_neigs,maxneigs);
3664: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3665: if (B_neigs) {
3666: PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);
3668: if (pcbddc->dbg_flag > 1) {
3669: PetscInt ii;
3670: for (ii=0;ii<B_neigs;ii++) {
3671: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3672: for (j=0;j<B_N;j++) {
3673: #if defined(PETSC_USE_COMPLEX)
3674: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3675: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3676: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3677: #else
3678: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3679: #endif
3680: }
3681: }
3682: }
3683: PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);
3684: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3685: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3686: cum++;
3687: }
3688: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3689: /* shift for next computation */
3690: cumarray += subset_size*subset_size;
3691: }
3692: if (pcbddc->dbg_flag) {
3693: PetscViewerFlush(pcbddc->dbg_viewer);
3694: }
3696: if (mss) {
3697: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3698: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3699: /* destroy matrices (junk) */
3700: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3701: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3702: }
3703: if (allocated_S_St) {
3704: PetscFree2(S,St);
3705: }
3706: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3707: #if defined(PETSC_USE_COMPLEX)
3708: PetscFree(rwork);
3709: #endif
3710: if (pcbddc->dbg_flag) {
3711: PetscInt maxneigs_r;
3712: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3713: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3714: }
3715: PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3716: return(0);
3717: }
3719: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3720: {
3721: PetscScalar *coarse_submat_vals;
3725: /* Setup local scatters R_to_B and (optionally) R_to_D */
3726: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3727: PCBDDCSetUpLocalScatters(pc);
3729: /* Setup local neumann solver ksp_R */
3730: /* PCBDDCSetUpLocalScatters should be called first! */
3731: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3733: /*
3734: Setup local correction and local part of coarse basis.
3735: Gives back the dense local part of the coarse matrix in column major ordering
3736: */
3737: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3739: /* Compute total number of coarse nodes and setup coarse solver */
3740: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3742: /* free */
3743: PetscFree(coarse_submat_vals);
3744: return(0);
3745: }
3747: PetscErrorCode PCBDDCResetCustomization(PC pc)
3748: {
3749: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3753: ISDestroy(&pcbddc->user_primal_vertices);
3754: ISDestroy(&pcbddc->user_primal_vertices_local);
3755: ISDestroy(&pcbddc->NeumannBoundaries);
3756: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3757: ISDestroy(&pcbddc->DirichletBoundaries);
3758: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3759: PetscFree(pcbddc->onearnullvecs_state);
3760: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3761: PCBDDCSetDofsSplitting(pc,0,NULL);
3762: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3763: return(0);
3764: }
3766: PetscErrorCode PCBDDCResetTopography(PC pc)
3767: {
3768: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3769: PetscInt i;
3773: MatDestroy(&pcbddc->nedcG);
3774: ISDestroy(&pcbddc->nedclocal);
3775: MatDestroy(&pcbddc->discretegradient);
3776: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3777: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3778: MatDestroy(&pcbddc->switch_static_change);
3779: VecDestroy(&pcbddc->work_change);
3780: MatDestroy(&pcbddc->ConstraintMatrix);
3781: MatDestroy(&pcbddc->divudotp);
3782: ISDestroy(&pcbddc->divudotp_vl2l);
3783: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3784: for (i=0;i<pcbddc->n_local_subs;i++) {
3785: ISDestroy(&pcbddc->local_subs[i]);
3786: }
3787: pcbddc->n_local_subs = 0;
3788: PetscFree(pcbddc->local_subs);
3789: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3790: pcbddc->graphanalyzed = PETSC_FALSE;
3791: pcbddc->recompute_topography = PETSC_TRUE;
3792: pcbddc->corner_selected = PETSC_FALSE;
3793: return(0);
3794: }
3796: PetscErrorCode PCBDDCResetSolvers(PC pc)
3797: {
3798: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3802: VecDestroy(&pcbddc->coarse_vec);
3803: if (pcbddc->coarse_phi_B) {
3804: PetscScalar *array;
3805: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3806: PetscFree(array);
3807: }
3808: MatDestroy(&pcbddc->coarse_phi_B);
3809: MatDestroy(&pcbddc->coarse_phi_D);
3810: MatDestroy(&pcbddc->coarse_psi_B);
3811: MatDestroy(&pcbddc->coarse_psi_D);
3812: VecDestroy(&pcbddc->vec1_P);
3813: VecDestroy(&pcbddc->vec1_C);
3814: MatDestroy(&pcbddc->local_auxmat2);
3815: MatDestroy(&pcbddc->local_auxmat1);
3816: VecDestroy(&pcbddc->vec1_R);
3817: VecDestroy(&pcbddc->vec2_R);
3818: ISDestroy(&pcbddc->is_R_local);
3819: VecScatterDestroy(&pcbddc->R_to_B);
3820: VecScatterDestroy(&pcbddc->R_to_D);
3821: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3822: KSPReset(pcbddc->ksp_D);
3823: KSPReset(pcbddc->ksp_R);
3824: KSPReset(pcbddc->coarse_ksp);
3825: MatDestroy(&pcbddc->local_mat);
3826: PetscFree(pcbddc->primal_indices_local_idxs);
3827: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3828: PetscFree(pcbddc->global_primal_indices);
3829: ISDestroy(&pcbddc->coarse_subassembling);
3830: MatDestroy(&pcbddc->benign_change);
3831: VecDestroy(&pcbddc->benign_vec);
3832: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3833: MatDestroy(&pcbddc->benign_B0);
3834: PetscSFDestroy(&pcbddc->benign_sf);
3835: if (pcbddc->benign_zerodiag_subs) {
3836: PetscInt i;
3837: for (i=0;i<pcbddc->benign_n;i++) {
3838: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3839: }
3840: PetscFree(pcbddc->benign_zerodiag_subs);
3841: }
3842: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3843: return(0);
3844: }
3846: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3847: {
3848: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3849: PC_IS *pcis = (PC_IS*)pc->data;
3850: VecType impVecType;
3851: PetscInt n_constraints,n_R,old_size;
3855: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3856: n_R = pcis->n - pcbddc->n_vertices;
3857: VecGetType(pcis->vec1_N,&impVecType);
3858: /* local work vectors (try to avoid unneeded work)*/
3859: /* R nodes */
3860: old_size = -1;
3861: if (pcbddc->vec1_R) {
3862: VecGetSize(pcbddc->vec1_R,&old_size);
3863: }
3864: if (n_R != old_size) {
3865: VecDestroy(&pcbddc->vec1_R);
3866: VecDestroy(&pcbddc->vec2_R);
3867: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3868: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3869: VecSetType(pcbddc->vec1_R,impVecType);
3870: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3871: }
3872: /* local primal dofs */
3873: old_size = -1;
3874: if (pcbddc->vec1_P) {
3875: VecGetSize(pcbddc->vec1_P,&old_size);
3876: }
3877: if (pcbddc->local_primal_size != old_size) {
3878: VecDestroy(&pcbddc->vec1_P);
3879: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3880: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3881: VecSetType(pcbddc->vec1_P,impVecType);
3882: }
3883: /* local explicit constraints */
3884: old_size = -1;
3885: if (pcbddc->vec1_C) {
3886: VecGetSize(pcbddc->vec1_C,&old_size);
3887: }
3888: if (n_constraints && n_constraints != old_size) {
3889: VecDestroy(&pcbddc->vec1_C);
3890: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3891: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3892: VecSetType(pcbddc->vec1_C,impVecType);
3893: }
3894: return(0);
3895: }
3897: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3898: {
3899: PetscErrorCode ierr;
3900: /* pointers to pcis and pcbddc */
3901: PC_IS* pcis = (PC_IS*)pc->data;
3902: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3903: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3904: /* submatrices of local problem */
3905: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3906: /* submatrices of local coarse problem */
3907: Mat S_VV,S_CV,S_VC,S_CC;
3908: /* working matrices */
3909: Mat C_CR;
3910: /* additional working stuff */
3911: PC pc_R;
3912: Mat F,Brhs = NULL;
3913: Vec dummy_vec;
3914: PetscBool isLU,isCHOL,need_benign_correction,sparserhs;
3915: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3916: PetscScalar *work;
3917: PetscInt *idx_V_B;
3918: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3919: PetscInt i,n_R,n_D,n_B;
3920: PetscScalar one=1.0,m_one=-1.0;
3923: if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3924: PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
3926: /* Set Non-overlapping dimensions */
3927: n_vertices = pcbddc->n_vertices;
3928: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3929: n_B = pcis->n_B;
3930: n_D = pcis->n - n_B;
3931: n_R = pcis->n - n_vertices;
3933: /* vertices in boundary numbering */
3934: PetscMalloc1(n_vertices,&idx_V_B);
3935: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3936: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);
3938: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3939: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3940: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3941: MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3942: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3943: MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3944: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3945: MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3946: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3947: MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);
3949: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3950: KSPGetPC(pcbddc->ksp_R,&pc_R);
3951: PCSetUp(pc_R);
3952: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3953: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3954: lda_rhs = n_R;
3955: need_benign_correction = PETSC_FALSE;
3956: if (isLU || isCHOL) {
3957: PCFactorGetMatrix(pc_R,&F);
3958: } else if (sub_schurs && sub_schurs->reuse_solver) {
3959: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3960: MatFactorType type;
3962: F = reuse_solver->F;
3963: MatGetFactorType(F,&type);
3964: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3965: if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3966: MatGetSize(F,&lda_rhs,NULL);
3967: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3968: } else F = NULL;
3970: /* determine if we can use a sparse right-hand side */
3971: sparserhs = PETSC_FALSE;
3972: if (F) {
3973: MatSolverType solver;
3975: MatFactorGetSolverType(F,&solver);
3976: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3977: }
3979: /* allocate workspace */
3980: n = 0;
3981: if (n_constraints) {
3982: n += lda_rhs*n_constraints;
3983: }
3984: if (n_vertices) {
3985: n = PetscMax(2*lda_rhs*n_vertices,n);
3986: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3987: }
3988: if (!pcbddc->symmetric_primal) {
3989: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3990: }
3991: PetscMalloc1(n,&work);
3993: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3994: dummy_vec = NULL;
3995: if (need_benign_correction && lda_rhs != n_R && F) {
3996: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3997: VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3998: VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3999: }
4001: MatDestroy(&pcbddc->local_auxmat1);
4002: MatDestroy(&pcbddc->local_auxmat2);
4004: /* Precompute stuffs needed for preprocessing and application of BDDC*/
4005: if (n_constraints) {
4006: Mat M3,C_B;
4007: IS is_aux;
4008: PetscScalar *array,*array2;
4010: /* Extract constraints on R nodes: C_{CR} */
4011: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
4012: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
4013: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4015: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4016: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4017: if (!sparserhs) {
4018: PetscArrayzero(work,lda_rhs*n_constraints);
4019: for (i=0;i<n_constraints;i++) {
4020: const PetscScalar *row_cmat_values;
4021: const PetscInt *row_cmat_indices;
4022: PetscInt size_of_constraint,j;
4024: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4025: for (j=0;j<size_of_constraint;j++) {
4026: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
4027: }
4028: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4029: }
4030: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
4031: } else {
4032: Mat tC_CR;
4034: MatScale(C_CR,-1.0);
4035: if (lda_rhs != n_R) {
4036: PetscScalar *aa;
4037: PetscInt r,*ii,*jj;
4038: PetscBool done;
4040: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4041: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4042: MatSeqAIJGetArray(C_CR,&aa);
4043: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4044: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4045: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4046: } else {
4047: PetscObjectReference((PetscObject)C_CR);
4048: tC_CR = C_CR;
4049: }
4050: MatCreateTranspose(tC_CR,&Brhs);
4051: MatDestroy(&tC_CR);
4052: }
4053: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4054: if (F) {
4055: if (need_benign_correction) {
4056: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4058: /* rhs is already zero on interior dofs, no need to change the rhs */
4059: PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);
4060: }
4061: MatMatSolve(F,Brhs,local_auxmat2_R);
4062: if (need_benign_correction) {
4063: PetscScalar *marr;
4064: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4066: MatDenseGetArray(local_auxmat2_R,&marr);
4067: if (lda_rhs != n_R) {
4068: for (i=0;i<n_constraints;i++) {
4069: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4070: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4071: VecResetArray(dummy_vec);
4072: }
4073: } else {
4074: for (i=0;i<n_constraints;i++) {
4075: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4076: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4077: VecResetArray(pcbddc->vec1_R);
4078: }
4079: }
4080: MatDenseRestoreArray(local_auxmat2_R,&marr);
4081: }
4082: } else {
4083: PetscScalar *marr;
4085: MatDenseGetArray(local_auxmat2_R,&marr);
4086: for (i=0;i<n_constraints;i++) {
4087: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4088: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4089: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4090: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4091: VecResetArray(pcbddc->vec1_R);
4092: VecResetArray(pcbddc->vec2_R);
4093: }
4094: MatDenseRestoreArray(local_auxmat2_R,&marr);
4095: }
4096: if (sparserhs) {
4097: MatScale(C_CR,-1.0);
4098: }
4099: MatDestroy(&Brhs);
4100: if (!pcbddc->switch_static) {
4101: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4102: MatDenseGetArray(pcbddc->local_auxmat2,&array);
4103: MatDenseGetArray(local_auxmat2_R,&array2);
4104: for (i=0;i<n_constraints;i++) {
4105: VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4106: VecPlaceArray(pcis->vec1_B,array+i*n_B);
4107: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4108: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4109: VecResetArray(pcis->vec1_B);
4110: VecResetArray(pcbddc->vec1_R);
4111: }
4112: MatDenseRestoreArray(local_auxmat2_R,&array2);
4113: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4114: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4115: } else {
4116: if (lda_rhs != n_R) {
4117: IS dummy;
4119: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4120: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4121: ISDestroy(&dummy);
4122: } else {
4123: PetscObjectReference((PetscObject)local_auxmat2_R);
4124: pcbddc->local_auxmat2 = local_auxmat2_R;
4125: }
4126: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4127: }
4128: ISDestroy(&is_aux);
4129: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
4130: MatScale(M3,m_one);
4131: if (isCHOL) {
4132: MatCholeskyFactor(M3,NULL,NULL);
4133: } else {
4134: MatLUFactor(M3,NULL,NULL,NULL);
4135: }
4136: MatSeqDenseInvertFactors_Private(M3);
4137: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4138: MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4139: MatDestroy(&C_B);
4140: MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4141: MatDestroy(&M3);
4142: }
4144: /* Get submatrices from subdomain matrix */
4145: if (n_vertices) {
4146: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4147: PetscBool oldpin;
4148: #endif
4149: PetscBool isaij;
4150: IS is_aux;
4152: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4153: IS tis;
4155: ISDuplicate(pcbddc->is_R_local,&tis);
4156: ISSort(tis);
4157: ISComplement(tis,0,pcis->n,&is_aux);
4158: ISDestroy(&tis);
4159: } else {
4160: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4161: }
4162: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4163: oldpin = pcbddc->local_mat->boundtocpu;
4164: #endif
4165: MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);
4166: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4167: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4168: PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);
4169: if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4170: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4171: }
4172: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4173: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4174: MatBindToCPU(pcbddc->local_mat,oldpin);
4175: #endif
4176: ISDestroy(&is_aux);
4177: }
4179: /* Matrix of coarse basis functions (local) */
4180: if (pcbddc->coarse_phi_B) {
4181: PetscInt on_B,on_primal,on_D=n_D;
4182: if (pcbddc->coarse_phi_D) {
4183: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4184: }
4185: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4186: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4187: PetscScalar *marray;
4189: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4190: PetscFree(marray);
4191: MatDestroy(&pcbddc->coarse_phi_B);
4192: MatDestroy(&pcbddc->coarse_psi_B);
4193: MatDestroy(&pcbddc->coarse_phi_D);
4194: MatDestroy(&pcbddc->coarse_psi_D);
4195: }
4196: }
4198: if (!pcbddc->coarse_phi_B) {
4199: PetscScalar *marr;
4201: /* memory size */
4202: n = n_B*pcbddc->local_primal_size;
4203: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4204: if (!pcbddc->symmetric_primal) n *= 2;
4205: PetscCalloc1(n,&marr);
4206: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4207: marr += n_B*pcbddc->local_primal_size;
4208: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4209: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4210: marr += n_D*pcbddc->local_primal_size;
4211: }
4212: if (!pcbddc->symmetric_primal) {
4213: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4214: marr += n_B*pcbddc->local_primal_size;
4215: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4216: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4217: }
4218: } else {
4219: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4220: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4221: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4222: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4223: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4224: }
4225: }
4226: }
4228: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4229: p0_lidx_I = NULL;
4230: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4231: const PetscInt *idxs;
4233: ISGetIndices(pcis->is_I_local,&idxs);
4234: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4235: for (i=0;i<pcbddc->benign_n;i++) {
4236: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4237: }
4238: ISRestoreIndices(pcis->is_I_local,&idxs);
4239: }
4241: /* vertices */
4242: if (n_vertices) {
4243: PetscBool restoreavr = PETSC_FALSE;
4245: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
4247: if (n_R) {
4248: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4249: PetscBLASInt B_N,B_one = 1;
4250: const PetscScalar *x;
4251: PetscScalar *y;
4253: MatScale(A_RV,m_one);
4254: if (need_benign_correction) {
4255: ISLocalToGlobalMapping RtoN;
4256: IS is_p0;
4257: PetscInt *idxs_p0,n;
4259: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4260: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4261: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4262: if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4263: ISLocalToGlobalMappingDestroy(&RtoN);
4264: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4265: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4266: ISDestroy(&is_p0);
4267: }
4269: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4270: if (!sparserhs || need_benign_correction) {
4271: if (lda_rhs == n_R) {
4272: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4273: } else {
4274: PetscScalar *av,*array;
4275: const PetscInt *xadj,*adjncy;
4276: PetscInt n;
4277: PetscBool flg_row;
4279: array = work+lda_rhs*n_vertices;
4280: PetscArrayzero(array,lda_rhs*n_vertices);
4281: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4282: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4283: MatSeqAIJGetArray(A_RV,&av);
4284: for (i=0;i<n;i++) {
4285: PetscInt j;
4286: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4287: }
4288: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4289: MatDestroy(&A_RV);
4290: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4291: }
4292: if (need_benign_correction) {
4293: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4294: PetscScalar *marr;
4296: MatDenseGetArray(A_RV,&marr);
4297: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4299: | 0 0 0 | (V)
4300: L = | 0 0 -1 | (P-p0)
4301: | 0 0 -1 | (p0)
4303: */
4304: for (i=0;i<reuse_solver->benign_n;i++) {
4305: const PetscScalar *vals;
4306: const PetscInt *idxs,*idxs_zero;
4307: PetscInt n,j,nz;
4309: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4310: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4311: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4312: for (j=0;j<n;j++) {
4313: PetscScalar val = vals[j];
4314: PetscInt k,col = idxs[j];
4315: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4316: }
4317: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4318: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4319: }
4320: MatDenseRestoreArray(A_RV,&marr);
4321: }
4322: PetscObjectReference((PetscObject)A_RV);
4323: Brhs = A_RV;
4324: } else {
4325: Mat tA_RVT,A_RVT;
4327: if (!pcbddc->symmetric_primal) {
4328: /* A_RV already scaled by -1 */
4329: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4330: } else {
4331: restoreavr = PETSC_TRUE;
4332: MatScale(A_VR,-1.0);
4333: PetscObjectReference((PetscObject)A_VR);
4334: A_RVT = A_VR;
4335: }
4336: if (lda_rhs != n_R) {
4337: PetscScalar *aa;
4338: PetscInt r,*ii,*jj;
4339: PetscBool done;
4341: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4342: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4343: MatSeqAIJGetArray(A_RVT,&aa);
4344: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4345: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4346: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4347: } else {
4348: PetscObjectReference((PetscObject)A_RVT);
4349: tA_RVT = A_RVT;
4350: }
4351: MatCreateTranspose(tA_RVT,&Brhs);
4352: MatDestroy(&tA_RVT);
4353: MatDestroy(&A_RVT);
4354: }
4355: if (F) {
4356: /* need to correct the rhs */
4357: if (need_benign_correction) {
4358: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4359: PetscScalar *marr;
4361: MatDenseGetArray(Brhs,&marr);
4362: if (lda_rhs != n_R) {
4363: for (i=0;i<n_vertices;i++) {
4364: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4365: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4366: VecResetArray(dummy_vec);
4367: }
4368: } else {
4369: for (i=0;i<n_vertices;i++) {
4370: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4371: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4372: VecResetArray(pcbddc->vec1_R);
4373: }
4374: }
4375: MatDenseRestoreArray(Brhs,&marr);
4376: }
4377: MatMatSolve(F,Brhs,A_RRmA_RV);
4378: if (restoreavr) {
4379: MatScale(A_VR,-1.0);
4380: }
4381: /* need to correct the solution */
4382: if (need_benign_correction) {
4383: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4384: PetscScalar *marr;
4386: MatDenseGetArray(A_RRmA_RV,&marr);
4387: if (lda_rhs != n_R) {
4388: for (i=0;i<n_vertices;i++) {
4389: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4390: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4391: VecResetArray(dummy_vec);
4392: }
4393: } else {
4394: for (i=0;i<n_vertices;i++) {
4395: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4396: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4397: VecResetArray(pcbddc->vec1_R);
4398: }
4399: }
4400: MatDenseRestoreArray(A_RRmA_RV,&marr);
4401: }
4402: } else {
4403: MatDenseGetArray(Brhs,&y);
4404: for (i=0;i<n_vertices;i++) {
4405: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4406: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4407: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4408: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4409: VecResetArray(pcbddc->vec1_R);
4410: VecResetArray(pcbddc->vec2_R);
4411: }
4412: MatDenseRestoreArray(Brhs,&y);
4413: }
4414: MatDestroy(&A_RV);
4415: MatDestroy(&Brhs);
4416: /* S_VV and S_CV */
4417: if (n_constraints) {
4418: Mat B;
4420: PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);
4421: for (i=0;i<n_vertices;i++) {
4422: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4423: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4424: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4425: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4426: VecResetArray(pcis->vec1_B);
4427: VecResetArray(pcbddc->vec1_R);
4428: }
4429: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4430: /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4431: MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);
4432: MatProductSetType(S_CV,MATPRODUCT_AB);
4433: MatProductSetFromOptions(S_CV);
4434: MatProductNumeric(S_CV);
4435: MatProductClear(S_CV);
4437: MatDestroy(&B);
4438: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4439: /* Reuse B = local_auxmat2_R * S_CV */
4440: MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);
4441: MatProductSetType(B,MATPRODUCT_AB);
4442: MatProductSetFromOptions(B);
4443: MatProductNumeric(B);
4445: MatScale(S_CV,m_one);
4446: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4447: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4448: MatDestroy(&B);
4449: }
4450: if (lda_rhs != n_R) {
4451: MatDestroy(&A_RRmA_RV);
4452: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4453: MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4454: }
4455: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4456: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4457: if (need_benign_correction) {
4458: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4459: PetscScalar *marr,*sums;
4461: PetscMalloc1(n_vertices,&sums);
4462: MatDenseGetArray(S_VVt,&marr);
4463: for (i=0;i<reuse_solver->benign_n;i++) {
4464: const PetscScalar *vals;
4465: const PetscInt *idxs,*idxs_zero;
4466: PetscInt n,j,nz;
4468: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4469: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4470: for (j=0;j<n_vertices;j++) {
4471: PetscInt k;
4472: sums[j] = 0.;
4473: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4474: }
4475: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4476: for (j=0;j<n;j++) {
4477: PetscScalar val = vals[j];
4478: PetscInt k;
4479: for (k=0;k<n_vertices;k++) {
4480: marr[idxs[j]+k*n_vertices] += val*sums[k];
4481: }
4482: }
4483: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4484: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4485: }
4486: PetscFree(sums);
4487: MatDenseRestoreArray(S_VVt,&marr);
4488: MatDestroy(&A_RV_bcorr);
4489: }
4490: MatDestroy(&A_RRmA_RV);
4491: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4492: MatDenseGetArrayRead(A_VV,&x);
4493: MatDenseGetArray(S_VVt,&y);
4494: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4495: MatDenseRestoreArrayRead(A_VV,&x);
4496: MatDenseRestoreArray(S_VVt,&y);
4497: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4498: MatDestroy(&S_VVt);
4499: } else {
4500: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4501: }
4502: MatDestroy(&A_VV);
4504: /* coarse basis functions */
4505: for (i=0;i<n_vertices;i++) {
4506: PetscScalar *y;
4508: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4509: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4510: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4511: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4512: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4513: y[n_B*i+idx_V_B[i]] = 1.0;
4514: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4515: VecResetArray(pcis->vec1_B);
4517: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4518: PetscInt j;
4520: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4521: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4522: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4523: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4524: VecResetArray(pcis->vec1_D);
4525: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4526: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4527: }
4528: VecResetArray(pcbddc->vec1_R);
4529: }
4530: /* if n_R == 0 the object is not destroyed */
4531: MatDestroy(&A_RV);
4532: }
4533: VecDestroy(&dummy_vec);
4535: if (n_constraints) {
4536: Mat B;
4538: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4539: MatScale(S_CC,m_one);
4540: MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);
4541: MatProductSetType(B,MATPRODUCT_AB);
4542: MatProductSetFromOptions(B);
4543: MatProductNumeric(B);
4545: MatScale(S_CC,m_one);
4546: if (n_vertices) {
4547: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4548: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4549: } else {
4550: Mat S_VCt;
4552: if (lda_rhs != n_R) {
4553: MatDestroy(&B);
4554: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4555: MatSeqDenseSetLDA(B,lda_rhs);
4556: }
4557: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4558: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4559: MatDestroy(&S_VCt);
4560: }
4561: }
4562: MatDestroy(&B);
4563: /* coarse basis functions */
4564: for (i=0;i<n_constraints;i++) {
4565: PetscScalar *y;
4567: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4568: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4569: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4570: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4571: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4572: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4573: VecResetArray(pcis->vec1_B);
4574: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4575: PetscInt j;
4577: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4578: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4579: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4580: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4581: VecResetArray(pcis->vec1_D);
4582: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4583: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4584: }
4585: VecResetArray(pcbddc->vec1_R);
4586: }
4587: }
4588: if (n_constraints) {
4589: MatDestroy(&local_auxmat2_R);
4590: }
4591: PetscFree(p0_lidx_I);
4593: /* coarse matrix entries relative to B_0 */
4594: if (pcbddc->benign_n) {
4595: Mat B0_B,B0_BPHI;
4596: IS is_dummy;
4597: const PetscScalar *data;
4598: PetscInt j;
4600: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4601: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4602: ISDestroy(&is_dummy);
4603: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4604: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4605: MatDenseGetArrayRead(B0_BPHI,&data);
4606: for (j=0;j<pcbddc->benign_n;j++) {
4607: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4608: for (i=0;i<pcbddc->local_primal_size;i++) {
4609: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4610: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4611: }
4612: }
4613: MatDenseRestoreArrayRead(B0_BPHI,&data);
4614: MatDestroy(&B0_B);
4615: MatDestroy(&B0_BPHI);
4616: }
4618: /* compute other basis functions for non-symmetric problems */
4619: if (!pcbddc->symmetric_primal) {
4620: Mat B_V=NULL,B_C=NULL;
4621: PetscScalar *marray;
4623: if (n_constraints) {
4624: Mat S_CCT,C_CRT;
4626: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4627: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4628: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4629: MatDestroy(&S_CCT);
4630: if (n_vertices) {
4631: Mat S_VCT;
4633: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4634: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4635: MatDestroy(&S_VCT);
4636: }
4637: MatDestroy(&C_CRT);
4638: } else {
4639: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4640: }
4641: if (n_vertices && n_R) {
4642: PetscScalar *av,*marray;
4643: const PetscInt *xadj,*adjncy;
4644: PetscInt n;
4645: PetscBool flg_row;
4647: /* B_V = B_V - A_VR^T */
4648: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4649: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4650: MatSeqAIJGetArray(A_VR,&av);
4651: MatDenseGetArray(B_V,&marray);
4652: for (i=0;i<n;i++) {
4653: PetscInt j;
4654: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4655: }
4656: MatDenseRestoreArray(B_V,&marray);
4657: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4658: MatDestroy(&A_VR);
4659: }
4661: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4662: if (n_vertices) {
4663: MatDenseGetArray(B_V,&marray);
4664: for (i=0;i<n_vertices;i++) {
4665: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4666: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4667: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4668: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4669: VecResetArray(pcbddc->vec1_R);
4670: VecResetArray(pcbddc->vec2_R);
4671: }
4672: MatDenseRestoreArray(B_V,&marray);
4673: }
4674: if (B_C) {
4675: MatDenseGetArray(B_C,&marray);
4676: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4677: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4678: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4679: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4680: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4681: VecResetArray(pcbddc->vec1_R);
4682: VecResetArray(pcbddc->vec2_R);
4683: }
4684: MatDenseRestoreArray(B_C,&marray);
4685: }
4686: /* coarse basis functions */
4687: for (i=0;i<pcbddc->local_primal_size;i++) {
4688: PetscScalar *y;
4690: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4691: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4692: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4693: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4694: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4695: if (i<n_vertices) {
4696: y[n_B*i+idx_V_B[i]] = 1.0;
4697: }
4698: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4699: VecResetArray(pcis->vec1_B);
4701: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4702: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4703: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4704: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4705: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4706: VecResetArray(pcis->vec1_D);
4707: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4708: }
4709: VecResetArray(pcbddc->vec1_R);
4710: }
4711: MatDestroy(&B_V);
4712: MatDestroy(&B_C);
4713: }
4715: /* free memory */
4716: PetscFree(idx_V_B);
4717: MatDestroy(&S_VV);
4718: MatDestroy(&S_CV);
4719: MatDestroy(&S_VC);
4720: MatDestroy(&S_CC);
4721: PetscFree(work);
4722: if (n_vertices) {
4723: MatDestroy(&A_VR);
4724: }
4725: if (n_constraints) {
4726: MatDestroy(&C_CR);
4727: }
4728: PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4730: /* Checking coarse_sub_mat and coarse basis functios */
4731: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4732: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4733: if (pcbddc->dbg_flag) {
4734: Mat coarse_sub_mat;
4735: Mat AUXMAT,TM1,TM2,TM3,TM4;
4736: Mat coarse_phi_D,coarse_phi_B;
4737: Mat coarse_psi_D,coarse_psi_B;
4738: Mat A_II,A_BB,A_IB,A_BI;
4739: Mat C_B,CPHI;
4740: IS is_dummy;
4741: Vec mones;
4742: MatType checkmattype=MATSEQAIJ;
4743: PetscReal real_value;
4745: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4746: Mat A;
4747: PCBDDCBenignProject(pc,NULL,NULL,&A);
4748: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4749: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4750: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4751: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4752: MatDestroy(&A);
4753: } else {
4754: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4755: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4756: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4757: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4758: }
4759: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4760: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4761: if (!pcbddc->symmetric_primal) {
4762: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4763: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4764: }
4765: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4767: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4768: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4769: PetscViewerFlush(pcbddc->dbg_viewer);
4770: if (!pcbddc->symmetric_primal) {
4771: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4772: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4773: MatDestroy(&AUXMAT);
4774: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4775: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4776: MatDestroy(&AUXMAT);
4777: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4778: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4779: MatDestroy(&AUXMAT);
4780: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4781: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4782: MatDestroy(&AUXMAT);
4783: } else {
4784: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4785: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4786: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4787: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4788: MatDestroy(&AUXMAT);
4789: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4790: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4791: MatDestroy(&AUXMAT);
4792: }
4793: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4794: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4795: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4796: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4797: if (pcbddc->benign_n) {
4798: Mat B0_B,B0_BPHI;
4799: const PetscScalar *data2;
4800: PetscScalar *data;
4801: PetscInt j;
4803: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4804: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4805: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4806: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4807: MatDenseGetArray(TM1,&data);
4808: MatDenseGetArrayRead(B0_BPHI,&data2);
4809: for (j=0;j<pcbddc->benign_n;j++) {
4810: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4811: for (i=0;i<pcbddc->local_primal_size;i++) {
4812: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4813: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4814: }
4815: }
4816: MatDenseRestoreArray(TM1,&data);
4817: MatDenseRestoreArrayRead(B0_BPHI,&data2);
4818: MatDestroy(&B0_B);
4819: ISDestroy(&is_dummy);
4820: MatDestroy(&B0_BPHI);
4821: }
4822: #if 0
4823: {
4824: PetscViewer viewer;
4825: char filename[256];
4826: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4827: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4828: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4829: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4830: MatView(coarse_sub_mat,viewer);
4831: PetscObjectSetName((PetscObject)TM1,"projected");
4832: MatView(TM1,viewer);
4833: if (pcbddc->coarse_phi_B) {
4834: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4835: MatView(pcbddc->coarse_phi_B,viewer);
4836: }
4837: if (pcbddc->coarse_phi_D) {
4838: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4839: MatView(pcbddc->coarse_phi_D,viewer);
4840: }
4841: if (pcbddc->coarse_psi_B) {
4842: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4843: MatView(pcbddc->coarse_psi_B,viewer);
4844: }
4845: if (pcbddc->coarse_psi_D) {
4846: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4847: MatView(pcbddc->coarse_psi_D,viewer);
4848: }
4849: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4850: MatView(pcbddc->local_mat,viewer);
4851: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4852: MatView(pcbddc->ConstraintMatrix,viewer);
4853: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4854: ISView(pcis->is_I_local,viewer);
4855: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4856: ISView(pcis->is_B_local,viewer);
4857: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4858: ISView(pcbddc->is_R_local,viewer);
4859: PetscViewerDestroy(&viewer);
4860: }
4861: #endif
4862: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4863: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4864: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4865: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4867: /* check constraints */
4868: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4869: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4870: if (!pcbddc->benign_n) { /* TODO: add benign case */
4871: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4872: } else {
4873: PetscScalar *data;
4874: Mat tmat;
4875: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4876: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4877: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4878: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4879: MatDestroy(&tmat);
4880: }
4881: MatCreateVecs(CPHI,&mones,NULL);
4882: VecSet(mones,-1.0);
4883: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4884: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4885: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4886: if (!pcbddc->symmetric_primal) {
4887: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4888: VecSet(mones,-1.0);
4889: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4890: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4891: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4892: }
4893: MatDestroy(&C_B);
4894: MatDestroy(&CPHI);
4895: ISDestroy(&is_dummy);
4896: VecDestroy(&mones);
4897: PetscViewerFlush(pcbddc->dbg_viewer);
4898: MatDestroy(&A_II);
4899: MatDestroy(&A_BB);
4900: MatDestroy(&A_IB);
4901: MatDestroy(&A_BI);
4902: MatDestroy(&TM1);
4903: MatDestroy(&TM2);
4904: MatDestroy(&TM3);
4905: MatDestroy(&TM4);
4906: MatDestroy(&coarse_phi_D);
4907: MatDestroy(&coarse_phi_B);
4908: if (!pcbddc->symmetric_primal) {
4909: MatDestroy(&coarse_psi_D);
4910: MatDestroy(&coarse_psi_B);
4911: }
4912: MatDestroy(&coarse_sub_mat);
4913: }
4914: /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4915: {
4916: PetscBool gpu;
4918: PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);
4919: if (gpu) {
4920: if (pcbddc->local_auxmat1) {
4921: MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);
4922: }
4923: if (pcbddc->local_auxmat2) {
4924: MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);
4925: }
4926: if (pcbddc->coarse_phi_B) {
4927: MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);
4928: }
4929: if (pcbddc->coarse_phi_D) {
4930: MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);
4931: }
4932: if (pcbddc->coarse_psi_B) {
4933: MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);
4934: }
4935: if (pcbddc->coarse_psi_D) {
4936: MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);
4937: }
4938: }
4939: }
4940: /* get back data */
4941: *coarse_submat_vals_n = coarse_submat_vals;
4942: return(0);
4943: }
4945: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4946: {
4947: Mat *work_mat;
4948: IS isrow_s,iscol_s;
4949: PetscBool rsorted,csorted;
4950: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4954: ISSorted(isrow,&rsorted);
4955: ISSorted(iscol,&csorted);
4956: ISGetLocalSize(isrow,&rsize);
4957: ISGetLocalSize(iscol,&csize);
4959: if (!rsorted) {
4960: const PetscInt *idxs;
4961: PetscInt *idxs_sorted,i;
4963: PetscMalloc1(rsize,&idxs_perm_r);
4964: PetscMalloc1(rsize,&idxs_sorted);
4965: for (i=0;i<rsize;i++) {
4966: idxs_perm_r[i] = i;
4967: }
4968: ISGetIndices(isrow,&idxs);
4969: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4970: for (i=0;i<rsize;i++) {
4971: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4972: }
4973: ISRestoreIndices(isrow,&idxs);
4974: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4975: } else {
4976: PetscObjectReference((PetscObject)isrow);
4977: isrow_s = isrow;
4978: }
4980: if (!csorted) {
4981: if (isrow == iscol) {
4982: PetscObjectReference((PetscObject)isrow_s);
4983: iscol_s = isrow_s;
4984: } else {
4985: const PetscInt *idxs;
4986: PetscInt *idxs_sorted,i;
4988: PetscMalloc1(csize,&idxs_perm_c);
4989: PetscMalloc1(csize,&idxs_sorted);
4990: for (i=0;i<csize;i++) {
4991: idxs_perm_c[i] = i;
4992: }
4993: ISGetIndices(iscol,&idxs);
4994: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4995: for (i=0;i<csize;i++) {
4996: idxs_sorted[i] = idxs[idxs_perm_c[i]];
4997: }
4998: ISRestoreIndices(iscol,&idxs);
4999: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
5000: }
5001: } else {
5002: PetscObjectReference((PetscObject)iscol);
5003: iscol_s = iscol;
5004: }
5006: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
5008: if (!rsorted || !csorted) {
5009: Mat new_mat;
5010: IS is_perm_r,is_perm_c;
5012: if (!rsorted) {
5013: PetscInt *idxs_r,i;
5014: PetscMalloc1(rsize,&idxs_r);
5015: for (i=0;i<rsize;i++) {
5016: idxs_r[idxs_perm_r[i]] = i;
5017: }
5018: PetscFree(idxs_perm_r);
5019: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
5020: } else {
5021: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
5022: }
5023: ISSetPermutation(is_perm_r);
5025: if (!csorted) {
5026: if (isrow_s == iscol_s) {
5027: PetscObjectReference((PetscObject)is_perm_r);
5028: is_perm_c = is_perm_r;
5029: } else {
5030: PetscInt *idxs_c,i;
5031: if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
5032: PetscMalloc1(csize,&idxs_c);
5033: for (i=0;i<csize;i++) {
5034: idxs_c[idxs_perm_c[i]] = i;
5035: }
5036: PetscFree(idxs_perm_c);
5037: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
5038: }
5039: } else {
5040: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
5041: }
5042: ISSetPermutation(is_perm_c);
5044: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
5045: MatDestroy(&work_mat[0]);
5046: work_mat[0] = new_mat;
5047: ISDestroy(&is_perm_r);
5048: ISDestroy(&is_perm_c);
5049: }
5051: PetscObjectReference((PetscObject)work_mat[0]);
5052: *B = work_mat[0];
5053: MatDestroyMatrices(1,&work_mat);
5054: ISDestroy(&isrow_s);
5055: ISDestroy(&iscol_s);
5056: return(0);
5057: }
5059: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5060: {
5061: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5062: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5063: Mat new_mat,lA;
5064: IS is_local,is_global;
5065: PetscInt local_size;
5066: PetscBool isseqaij;
5070: MatDestroy(&pcbddc->local_mat);
5071: MatGetSize(matis->A,&local_size,NULL);
5072: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5073: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
5074: ISDestroy(&is_local);
5075: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5076: ISDestroy(&is_global);
5078: if (pcbddc->dbg_flag) {
5079: Vec x,x_change;
5080: PetscReal error;
5082: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5083: VecSetRandom(x,NULL);
5084: MatMult(ChangeOfBasisMatrix,x,x_change);
5085: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5086: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5087: MatMult(new_mat,matis->x,matis->y);
5088: if (!pcbddc->change_interior) {
5089: const PetscScalar *x,*y,*v;
5090: PetscReal lerror = 0.;
5091: PetscInt i;
5093: VecGetArrayRead(matis->x,&x);
5094: VecGetArrayRead(matis->y,&y);
5095: VecGetArrayRead(matis->counter,&v);
5096: for (i=0;i<local_size;i++)
5097: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5098: lerror = PetscAbsScalar(x[i]-y[i]);
5099: VecRestoreArrayRead(matis->x,&x);
5100: VecRestoreArrayRead(matis->y,&y);
5101: VecRestoreArrayRead(matis->counter,&v);
5102: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
5103: if (error > PETSC_SMALL) {
5104: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5105: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5106: } else {
5107: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5108: }
5109: }
5110: }
5111: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5112: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5113: VecAXPY(x,-1.0,x_change);
5114: VecNorm(x,NORM_INFINITY,&error);
5115: if (error > PETSC_SMALL) {
5116: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5117: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5118: } else {
5119: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5120: }
5121: }
5122: VecDestroy(&x);
5123: VecDestroy(&x_change);
5124: }
5126: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5127: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
5129: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5130: PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5131: if (isseqaij) {
5132: MatDestroy(&pcbddc->local_mat);
5133: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5134: if (lA) {
5135: Mat work;
5136: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5137: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5138: MatDestroy(&work);
5139: }
5140: } else {
5141: Mat work_mat;
5143: MatDestroy(&pcbddc->local_mat);
5144: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5145: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5146: MatDestroy(&work_mat);
5147: if (lA) {
5148: Mat work;
5149: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5150: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5151: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5152: MatDestroy(&work);
5153: }
5154: }
5155: if (matis->A->symmetric_set) {
5156: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5157: #if !defined(PETSC_USE_COMPLEX)
5158: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5159: #endif
5160: }
5161: MatDestroy(&new_mat);
5162: return(0);
5163: }
5165: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5166: {
5167: PC_IS* pcis = (PC_IS*)(pc->data);
5168: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5169: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5170: PetscInt *idx_R_local=NULL;
5171: PetscInt n_vertices,i,j,n_R,n_D,n_B;
5172: PetscInt vbs,bs;
5173: PetscBT bitmask=NULL;
5174: PetscErrorCode ierr;
5177: /*
5178: No need to setup local scatters if
5179: - primal space is unchanged
5180: AND
5181: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5182: AND
5183: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5184: */
5185: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5186: return(0);
5187: }
5188: /* destroy old objects */
5189: ISDestroy(&pcbddc->is_R_local);
5190: VecScatterDestroy(&pcbddc->R_to_B);
5191: VecScatterDestroy(&pcbddc->R_to_D);
5192: /* Set Non-overlapping dimensions */
5193: n_B = pcis->n_B;
5194: n_D = pcis->n - n_B;
5195: n_vertices = pcbddc->n_vertices;
5197: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5199: /* create auxiliary bitmask and allocate workspace */
5200: if (!sub_schurs || !sub_schurs->reuse_solver) {
5201: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5202: PetscBTCreate(pcis->n,&bitmask);
5203: for (i=0;i<n_vertices;i++) {
5204: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5205: }
5207: for (i=0, n_R=0; i<pcis->n; i++) {
5208: if (!PetscBTLookup(bitmask,i)) {
5209: idx_R_local[n_R++] = i;
5210: }
5211: }
5212: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5213: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5215: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5216: ISGetLocalSize(reuse_solver->is_R,&n_R);
5217: }
5219: /* Block code */
5220: vbs = 1;
5221: MatGetBlockSize(pcbddc->local_mat,&bs);
5222: if (bs>1 && !(n_vertices%bs)) {
5223: PetscBool is_blocked = PETSC_TRUE;
5224: PetscInt *vary;
5225: if (!sub_schurs || !sub_schurs->reuse_solver) {
5226: PetscMalloc1(pcis->n/bs,&vary);
5227: PetscArrayzero(vary,pcis->n/bs);
5228: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5229: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5230: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5231: for (i=0; i<pcis->n/bs; i++) {
5232: if (vary[i]!=0 && vary[i]!=bs) {
5233: is_blocked = PETSC_FALSE;
5234: break;
5235: }
5236: }
5237: PetscFree(vary);
5238: } else {
5239: /* Verify directly the R set */
5240: for (i=0; i<n_R/bs; i++) {
5241: PetscInt j,node=idx_R_local[bs*i];
5242: for (j=1; j<bs; j++) {
5243: if (node != idx_R_local[bs*i+j]-j) {
5244: is_blocked = PETSC_FALSE;
5245: break;
5246: }
5247: }
5248: }
5249: }
5250: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5251: vbs = bs;
5252: for (i=0;i<n_R/vbs;i++) {
5253: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5254: }
5255: }
5256: }
5257: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5258: if (sub_schurs && sub_schurs->reuse_solver) {
5259: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5261: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5262: ISDestroy(&reuse_solver->is_R);
5263: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5264: reuse_solver->is_R = pcbddc->is_R_local;
5265: } else {
5266: PetscFree(idx_R_local);
5267: }
5269: /* print some info if requested */
5270: if (pcbddc->dbg_flag) {
5271: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5272: PetscViewerFlush(pcbddc->dbg_viewer);
5273: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5274: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5275: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5276: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5277: PetscViewerFlush(pcbddc->dbg_viewer);
5278: }
5280: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5281: if (!sub_schurs || !sub_schurs->reuse_solver) {
5282: IS is_aux1,is_aux2;
5283: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5285: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5286: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5287: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5288: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5289: for (i=0; i<n_D; i++) {
5290: PetscBTSet(bitmask,is_indices[i]);
5291: }
5292: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5293: for (i=0, j=0; i<n_R; i++) {
5294: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5295: aux_array1[j++] = i;
5296: }
5297: }
5298: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5299: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5300: for (i=0, j=0; i<n_B; i++) {
5301: if (!PetscBTLookup(bitmask,is_indices[i])) {
5302: aux_array2[j++] = i;
5303: }
5304: }
5305: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5306: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5307: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5308: ISDestroy(&is_aux1);
5309: ISDestroy(&is_aux2);
5311: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5312: PetscMalloc1(n_D,&aux_array1);
5313: for (i=0, j=0; i<n_R; i++) {
5314: if (PetscBTLookup(bitmask,idx_R_local[i])) {
5315: aux_array1[j++] = i;
5316: }
5317: }
5318: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5319: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5320: ISDestroy(&is_aux1);
5321: }
5322: PetscBTDestroy(&bitmask);
5323: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5324: } else {
5325: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5326: IS tis;
5327: PetscInt schur_size;
5329: ISGetLocalSize(reuse_solver->is_B,&schur_size);
5330: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5331: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5332: ISDestroy(&tis);
5333: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5334: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5335: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5336: ISDestroy(&tis);
5337: }
5338: }
5339: return(0);
5340: }
5342: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5343: {
5344: MatNullSpace NullSpace;
5345: Mat dmat;
5346: const Vec *nullvecs;
5347: Vec v,v2,*nullvecs2;
5348: VecScatter sct = NULL;
5349: PetscContainer c;
5350: PetscScalar *ddata;
5351: PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs;
5352: PetscBool nnsp_has_cnst;
5356: if (!is && !B) { /* MATIS */
5357: Mat_IS* matis = (Mat_IS*)A->data;
5359: if (!B) {
5360: MatISGetLocalMat(A,&B);
5361: }
5362: sct = matis->cctx;
5363: PetscObjectReference((PetscObject)sct);
5364: } else {
5365: MatGetNullSpace(B,&NullSpace);
5366: if (!NullSpace) {
5367: MatGetNearNullSpace(B,&NullSpace);
5368: }
5369: if (NullSpace) return(0);
5370: }
5371: MatGetNullSpace(A,&NullSpace);
5372: if (!NullSpace) {
5373: MatGetNearNullSpace(A,&NullSpace);
5374: }
5375: if (!NullSpace) return(0);
5377: MatCreateVecs(A,&v,NULL);
5378: MatCreateVecs(B,&v2,NULL);
5379: if (!sct) {
5380: VecScatterCreate(v,is,v2,NULL,&sct);
5381: }
5382: MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5383: bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst;
5384: PetscMalloc1(bsiz,&nullvecs2);
5385: VecGetBlockSize(v2,&bs);
5386: VecGetSize(v2,&N);
5387: VecGetLocalSize(v2,&n);
5388: PetscMalloc1(n*bsiz,&ddata);
5389: for (k=0;k<nnsp_size;k++) {
5390: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);
5391: VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5392: VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5393: }
5394: if (nnsp_has_cnst) {
5395: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);
5396: VecSet(nullvecs2[nnsp_size],1.0);
5397: }
5398: PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);
5399: MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);
5401: MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);
5402: PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);
5403: PetscContainerSetPointer(c,ddata);
5404: PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);
5405: PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);
5406: PetscContainerDestroy(&c);
5407: PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5408: MatDestroy(&dmat);
5410: for (k=0;k<bsiz;k++) {
5411: VecDestroy(&nullvecs2[k]);
5412: }
5413: PetscFree(nullvecs2);
5414: MatSetNearNullSpace(B,NullSpace);
5415: MatNullSpaceDestroy(&NullSpace);
5416: VecDestroy(&v);
5417: VecDestroy(&v2);
5418: VecScatterDestroy(&sct);
5419: return(0);
5420: }
5422: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5423: {
5424: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
5425: PC_IS *pcis = (PC_IS*)pc->data;
5426: PC pc_temp;
5427: Mat A_RR;
5428: MatNullSpace nnsp;
5429: MatReuse reuse;
5430: PetscScalar m_one = -1.0;
5431: PetscReal value;
5432: PetscInt n_D,n_R;
5433: PetscBool issbaij,opts;
5435: void (*f)(void) = 0;
5436: char dir_prefix[256],neu_prefix[256],str_level[16];
5437: size_t len;
5440: PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5441: /* approximate solver, propagate NearNullSpace if needed */
5442: if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5443: MatNullSpace gnnsp1,gnnsp2;
5444: PetscBool lhas,ghas;
5446: MatGetNearNullSpace(pcbddc->local_mat,&nnsp);
5447: MatGetNearNullSpace(pc->pmat,&gnnsp1);
5448: MatGetNullSpace(pc->pmat,&gnnsp2);
5449: lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5450: MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
5451: if (!ghas && (gnnsp1 || gnnsp2)) {
5452: MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);
5453: }
5454: }
5456: /* compute prefixes */
5457: PetscStrcpy(dir_prefix,"");
5458: PetscStrcpy(neu_prefix,"");
5459: if (!pcbddc->current_level) {
5460: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5461: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5462: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5463: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5464: } else {
5465: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5466: PetscStrlen(((PetscObject)pc)->prefix,&len);
5467: len -= 15; /* remove "pc_bddc_coarse_" */
5468: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5469: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5470: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5471: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5472: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5473: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5474: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5475: PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5476: PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5477: }
5479: /* DIRICHLET PROBLEM */
5480: if (dirichlet) {
5481: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5482: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5483: if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5484: if (pcbddc->dbg_flag) {
5485: Mat A_IIn;
5487: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5488: MatDestroy(&pcis->A_II);
5489: pcis->A_II = A_IIn;
5490: }
5491: }
5492: if (pcbddc->local_mat->symmetric_set) {
5493: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5494: }
5495: /* Matrix for Dirichlet problem is pcis->A_II */
5496: n_D = pcis->n - pcis->n_B;
5497: opts = PETSC_FALSE;
5498: if (!pcbddc->ksp_D) { /* create object if not yet build */
5499: opts = PETSC_TRUE;
5500: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5501: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5502: /* default */
5503: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5504: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5505: PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5506: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5507: if (issbaij) {
5508: PCSetType(pc_temp,PCCHOLESKY);
5509: } else {
5510: PCSetType(pc_temp,PCLU);
5511: }
5512: KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5513: }
5514: MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5515: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5516: /* Allow user's customization */
5517: if (opts) {
5518: KSPSetFromOptions(pcbddc->ksp_D);
5519: }
5520: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5521: if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5522: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5523: }
5524: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5525: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5526: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5527: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5528: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5529: const PetscInt *idxs;
5530: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5532: ISGetLocalSize(pcis->is_I_local,&nl);
5533: ISGetIndices(pcis->is_I_local,&idxs);
5534: PetscMalloc1(nl*cdim,&scoords);
5535: for (i=0;i<nl;i++) {
5536: for (d=0;d<cdim;d++) {
5537: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5538: }
5539: }
5540: ISRestoreIndices(pcis->is_I_local,&idxs);
5541: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5542: PetscFree(scoords);
5543: }
5544: if (sub_schurs && sub_schurs->reuse_solver) {
5545: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5547: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5548: }
5550: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5551: if (!n_D) {
5552: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5553: PCSetType(pc_temp,PCNONE);
5554: }
5555: KSPSetUp(pcbddc->ksp_D);
5556: /* set ksp_D into pcis data */
5557: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5558: KSPDestroy(&pcis->ksp_D);
5559: pcis->ksp_D = pcbddc->ksp_D;
5560: }
5562: /* NEUMANN PROBLEM */
5563: A_RR = 0;
5564: if (neumann) {
5565: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5566: PetscInt ibs,mbs;
5567: PetscBool issbaij, reuse_neumann_solver;
5568: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5570: reuse_neumann_solver = PETSC_FALSE;
5571: if (sub_schurs && sub_schurs->reuse_solver) {
5572: IS iP;
5574: reuse_neumann_solver = PETSC_TRUE;
5575: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5576: if (iP) reuse_neumann_solver = PETSC_FALSE;
5577: }
5578: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5579: ISGetSize(pcbddc->is_R_local,&n_R);
5580: if (pcbddc->ksp_R) { /* already created ksp */
5581: PetscInt nn_R;
5582: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5583: PetscObjectReference((PetscObject)A_RR);
5584: MatGetSize(A_RR,&nn_R,NULL);
5585: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5586: KSPReset(pcbddc->ksp_R);
5587: MatDestroy(&A_RR);
5588: reuse = MAT_INITIAL_MATRIX;
5589: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5590: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5591: MatDestroy(&A_RR);
5592: reuse = MAT_INITIAL_MATRIX;
5593: } else { /* safe to reuse the matrix */
5594: reuse = MAT_REUSE_MATRIX;
5595: }
5596: }
5597: /* last check */
5598: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5599: MatDestroy(&A_RR);
5600: reuse = MAT_INITIAL_MATRIX;
5601: }
5602: } else { /* first time, so we need to create the matrix */
5603: reuse = MAT_INITIAL_MATRIX;
5604: }
5605: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5606: TODO: Get Rid of these conversions */
5607: MatGetBlockSize(pcbddc->local_mat,&mbs);
5608: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5609: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5610: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5611: if (matis->A == pcbddc->local_mat) {
5612: MatDestroy(&pcbddc->local_mat);
5613: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5614: } else {
5615: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5616: }
5617: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5618: if (matis->A == pcbddc->local_mat) {
5619: MatDestroy(&pcbddc->local_mat);
5620: MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5621: } else {
5622: MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5623: }
5624: }
5625: /* extract A_RR */
5626: if (reuse_neumann_solver) {
5627: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5629: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5630: MatDestroy(&A_RR);
5631: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5632: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5633: } else {
5634: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5635: }
5636: } else {
5637: MatDestroy(&A_RR);
5638: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5639: PetscObjectReference((PetscObject)A_RR);
5640: }
5641: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5642: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5643: }
5644: if (pcbddc->local_mat->symmetric_set) {
5645: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5646: }
5647: opts = PETSC_FALSE;
5648: if (!pcbddc->ksp_R) { /* create object if not present */
5649: opts = PETSC_TRUE;
5650: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5651: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5652: /* default */
5653: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5654: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5655: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5656: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5657: if (issbaij) {
5658: PCSetType(pc_temp,PCCHOLESKY);
5659: } else {
5660: PCSetType(pc_temp,PCLU);
5661: }
5662: KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5663: }
5664: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5665: MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5666: if (opts) { /* Allow user's customization once */
5667: KSPSetFromOptions(pcbddc->ksp_R);
5668: }
5669: MatGetNearNullSpace(A_RR,&nnsp);
5670: if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5671: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5672: }
5673: MatGetNearNullSpace(A_RR,&nnsp);
5674: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5675: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5676: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5677: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5678: const PetscInt *idxs;
5679: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5681: ISGetLocalSize(pcbddc->is_R_local,&nl);
5682: ISGetIndices(pcbddc->is_R_local,&idxs);
5683: PetscMalloc1(nl*cdim,&scoords);
5684: for (i=0;i<nl;i++) {
5685: for (d=0;d<cdim;d++) {
5686: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5687: }
5688: }
5689: ISRestoreIndices(pcbddc->is_R_local,&idxs);
5690: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5691: PetscFree(scoords);
5692: }
5694: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5695: if (!n_R) {
5696: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5697: PCSetType(pc_temp,PCNONE);
5698: }
5699: /* Reuse solver if it is present */
5700: if (reuse_neumann_solver) {
5701: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5703: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5704: }
5705: KSPSetUp(pcbddc->ksp_R);
5706: }
5708: if (pcbddc->dbg_flag) {
5709: PetscViewerFlush(pcbddc->dbg_viewer);
5710: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5711: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5712: }
5713: PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5715: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5716: if (pcbddc->NullSpace_corr[0]) {
5717: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5718: }
5719: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5720: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5721: }
5722: if (neumann && pcbddc->NullSpace_corr[2]) {
5723: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5724: }
5725: /* check Dirichlet and Neumann solvers */
5726: if (pcbddc->dbg_flag) {
5727: if (dirichlet) { /* Dirichlet */
5728: VecSetRandom(pcis->vec1_D,NULL);
5729: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5730: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5731: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5732: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5733: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5734: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5735: PetscViewerFlush(pcbddc->dbg_viewer);
5736: }
5737: if (neumann) { /* Neumann */
5738: VecSetRandom(pcbddc->vec1_R,NULL);
5739: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5740: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5741: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5742: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5743: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5744: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5745: PetscViewerFlush(pcbddc->dbg_viewer);
5746: }
5747: }
5748: /* free Neumann problem's matrix */
5749: MatDestroy(&A_RR);
5750: return(0);
5751: }
5753: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5754: {
5755: PetscErrorCode ierr;
5756: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5757: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5758: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;
5761: if (!reuse_solver) {
5762: VecSet(pcbddc->vec1_R,0.);
5763: }
5764: if (!pcbddc->switch_static) {
5765: if (applytranspose && pcbddc->local_auxmat1) {
5766: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5767: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5768: }
5769: if (!reuse_solver) {
5770: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5771: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5772: } else {
5773: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5775: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5776: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5777: }
5778: } else {
5779: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5780: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5781: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5782: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5783: if (applytranspose && pcbddc->local_auxmat1) {
5784: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5785: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5786: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5787: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5788: }
5789: }
5790: if (!reuse_solver || pcbddc->switch_static) {
5791: if (applytranspose) {
5792: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5793: } else {
5794: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5795: }
5796: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5797: } else {
5798: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5800: if (applytranspose) {
5801: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5802: } else {
5803: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5804: }
5805: }
5806: VecSet(inout_B,0.);
5807: if (!pcbddc->switch_static) {
5808: if (!reuse_solver) {
5809: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5810: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5811: } else {
5812: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5814: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5815: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5816: }
5817: if (!applytranspose && pcbddc->local_auxmat1) {
5818: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5819: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5820: }
5821: } else {
5822: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5823: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5824: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5825: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5826: if (!applytranspose && pcbddc->local_auxmat1) {
5827: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5828: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5829: }
5830: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5831: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5832: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5833: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5834: }
5835: return(0);
5836: }
5838: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5839: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5840: {
5842: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5843: PC_IS* pcis = (PC_IS*) (pc->data);
5844: const PetscScalar zero = 0.0;
5847: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5848: if (!pcbddc->benign_apply_coarse_only) {
5849: if (applytranspose) {
5850: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5851: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5852: } else {
5853: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5854: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5855: }
5856: } else {
5857: VecSet(pcbddc->vec1_P,zero);
5858: }
5860: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5861: if (pcbddc->benign_n) {
5862: PetscScalar *array;
5863: PetscInt j;
5865: VecGetArray(pcbddc->vec1_P,&array);
5866: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5867: VecRestoreArray(pcbddc->vec1_P,&array);
5868: }
5870: /* start communications from local primal nodes to rhs of coarse solver */
5871: VecSet(pcbddc->coarse_vec,zero);
5872: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5873: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5875: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5876: if (pcbddc->coarse_ksp) {
5877: Mat coarse_mat;
5878: Vec rhs,sol;
5879: MatNullSpace nullsp;
5880: PetscBool isbddc = PETSC_FALSE;
5882: if (pcbddc->benign_have_null) {
5883: PC coarse_pc;
5885: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5886: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5887: /* we need to propagate to coarser levels the need for a possible benign correction */
5888: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5889: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5890: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5891: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5892: }
5893: }
5894: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5895: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5896: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5897: if (applytranspose) {
5898: if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5899: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5900: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5901: MatGetTransposeNullSpace(coarse_mat,&nullsp);
5902: if (nullsp) {
5903: MatNullSpaceRemove(nullsp,sol);
5904: }
5905: } else {
5906: MatGetNullSpace(coarse_mat,&nullsp);
5907: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5908: PC coarse_pc;
5910: if (nullsp) {
5911: MatNullSpaceRemove(nullsp,rhs);
5912: }
5913: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5914: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5915: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5916: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5917: } else {
5918: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5919: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5920: if (nullsp) {
5921: MatNullSpaceRemove(nullsp,sol);
5922: }
5923: }
5924: }
5925: /* we don't need the benign correction at coarser levels anymore */
5926: if (pcbddc->benign_have_null && isbddc) {
5927: PC coarse_pc;
5928: PC_BDDC* coarsepcbddc;
5930: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5931: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5932: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5933: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5934: }
5935: }
5937: /* Local solution on R nodes */
5938: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5939: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5940: }
5941: /* communications from coarse sol to local primal nodes */
5942: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5943: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5945: /* Sum contributions from the two levels */
5946: if (!pcbddc->benign_apply_coarse_only) {
5947: if (applytranspose) {
5948: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5949: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5950: } else {
5951: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5952: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5953: }
5954: /* store p0 */
5955: if (pcbddc->benign_n) {
5956: PetscScalar *array;
5957: PetscInt j;
5959: VecGetArray(pcbddc->vec1_P,&array);
5960: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5961: VecRestoreArray(pcbddc->vec1_P,&array);
5962: }
5963: } else { /* expand the coarse solution */
5964: if (applytranspose) {
5965: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5966: } else {
5967: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5968: }
5969: }
5970: return(0);
5971: }
5973: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5974: {
5975: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5976: Vec from,to;
5977: const PetscScalar *array;
5978: PetscErrorCode ierr;
5981: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5982: from = pcbddc->coarse_vec;
5983: to = pcbddc->vec1_P;
5984: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5985: Vec tvec;
5987: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5988: VecResetArray(tvec);
5989: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5990: VecGetArrayRead(tvec,&array);
5991: VecPlaceArray(from,array);
5992: VecRestoreArrayRead(tvec,&array);
5993: }
5994: } else { /* from local to global -> put data in coarse right hand side */
5995: from = pcbddc->vec1_P;
5996: to = pcbddc->coarse_vec;
5997: }
5998: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5999: return(0);
6000: }
6002: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6003: {
6004: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
6005: Vec from,to;
6006: const PetscScalar *array;
6007: PetscErrorCode ierr;
6010: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6011: from = pcbddc->coarse_vec;
6012: to = pcbddc->vec1_P;
6013: } else { /* from local to global -> put data in coarse right hand side */
6014: from = pcbddc->vec1_P;
6015: to = pcbddc->coarse_vec;
6016: }
6017: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
6018: if (smode == SCATTER_FORWARD) {
6019: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6020: Vec tvec;
6022: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
6023: VecGetArrayRead(to,&array);
6024: VecPlaceArray(tvec,array);
6025: VecRestoreArrayRead(to,&array);
6026: }
6027: } else {
6028: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6029: VecResetArray(from);
6030: }
6031: }
6032: return(0);
6033: }
6035: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6036: {
6037: PetscErrorCode ierr;
6038: PC_IS* pcis = (PC_IS*)(pc->data);
6039: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
6040: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
6041: /* one and zero */
6042: PetscScalar one=1.0,zero=0.0;
6043: /* space to store constraints and their local indices */
6044: PetscScalar *constraints_data;
6045: PetscInt *constraints_idxs,*constraints_idxs_B;
6046: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
6047: PetscInt *constraints_n;
6048: /* iterators */
6049: PetscInt i,j,k,total_counts,total_counts_cc,cum;
6050: /* BLAS integers */
6051: PetscBLASInt lwork,lierr;
6052: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
6053: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
6054: /* reuse */
6055: PetscInt olocal_primal_size,olocal_primal_size_cc;
6056: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
6057: /* change of basis */
6058: PetscBool qr_needed;
6059: PetscBT change_basis,qr_needed_idx;
6060: /* auxiliary stuff */
6061: PetscInt *nnz,*is_indices;
6062: PetscInt ncc;
6063: /* some quantities */
6064: PetscInt n_vertices,total_primal_vertices,valid_constraints;
6065: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
6066: PetscReal tol; /* tolerance for retaining eigenmodes */
6069: tol = PetscSqrtReal(PETSC_SMALL);
6070: /* Destroy Mat objects computed previously */
6071: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
6072: MatDestroy(&pcbddc->ConstraintMatrix);
6073: MatDestroy(&pcbddc->switch_static_change);
6074: /* save info on constraints from previous setup (if any) */
6075: olocal_primal_size = pcbddc->local_primal_size;
6076: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6077: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
6078: PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);
6079: PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);
6080: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
6081: PetscFree(pcbddc->primal_indices_local_idxs);
6083: if (!pcbddc->adaptive_selection) {
6084: IS ISForVertices,*ISForFaces,*ISForEdges;
6085: MatNullSpace nearnullsp;
6086: const Vec *nearnullvecs;
6087: Vec *localnearnullsp;
6088: PetscScalar *array;
6089: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
6090: PetscBool nnsp_has_cnst;
6091: /* LAPACK working arrays for SVD or POD */
6092: PetscBool skip_lapack,boolforchange;
6093: PetscScalar *work;
6094: PetscReal *singular_vals;
6095: #if defined(PETSC_USE_COMPLEX)
6096: PetscReal *rwork;
6097: #endif
6098: PetscScalar *temp_basis = NULL,*correlation_mat = NULL;
6099: PetscBLASInt dummy_int=1;
6100: PetscScalar dummy_scalar=1.;
6101: PetscBool use_pod = PETSC_FALSE;
6103: /* MKL SVD with same input gives different results on different processes! */
6104: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL)
6105: use_pod = PETSC_TRUE;
6106: #endif
6107: /* Get index sets for faces, edges and vertices from graph */
6108: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6109: /* print some info */
6110: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6111: PetscInt nv;
6113: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6114: ISGetSize(ISForVertices,&nv);
6115: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6116: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6117: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6118: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6119: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6120: PetscViewerFlush(pcbddc->dbg_viewer);
6121: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6122: }
6124: /* free unneeded index sets */
6125: if (!pcbddc->use_vertices) {
6126: ISDestroy(&ISForVertices);
6127: }
6128: if (!pcbddc->use_edges) {
6129: for (i=0;i<n_ISForEdges;i++) {
6130: ISDestroy(&ISForEdges[i]);
6131: }
6132: PetscFree(ISForEdges);
6133: n_ISForEdges = 0;
6134: }
6135: if (!pcbddc->use_faces) {
6136: for (i=0;i<n_ISForFaces;i++) {
6137: ISDestroy(&ISForFaces[i]);
6138: }
6139: PetscFree(ISForFaces);
6140: n_ISForFaces = 0;
6141: }
6143: /* check if near null space is attached to global mat */
6144: if (pcbddc->use_nnsp) {
6145: MatGetNearNullSpace(pc->pmat,&nearnullsp);
6146: } else nearnullsp = NULL;
6148: if (nearnullsp) {
6149: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6150: /* remove any stored info */
6151: MatNullSpaceDestroy(&pcbddc->onearnullspace);
6152: PetscFree(pcbddc->onearnullvecs_state);
6153: /* store information for BDDC solver reuse */
6154: PetscObjectReference((PetscObject)nearnullsp);
6155: pcbddc->onearnullspace = nearnullsp;
6156: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6157: for (i=0;i<nnsp_size;i++) {
6158: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6159: }
6160: } else { /* if near null space is not provided BDDC uses constants by default */
6161: nnsp_size = 0;
6162: nnsp_has_cnst = PETSC_TRUE;
6163: }
6164: /* get max number of constraints on a single cc */
6165: max_constraints = nnsp_size;
6166: if (nnsp_has_cnst) max_constraints++;
6168: /*
6169: Evaluate maximum storage size needed by the procedure
6170: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6171: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6172: There can be multiple constraints per connected component
6173: */
6174: n_vertices = 0;
6175: if (ISForVertices) {
6176: ISGetSize(ISForVertices,&n_vertices);
6177: }
6178: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6179: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
6181: total_counts = n_ISForFaces+n_ISForEdges;
6182: total_counts *= max_constraints;
6183: total_counts += n_vertices;
6184: PetscBTCreate(total_counts,&change_basis);
6186: total_counts = 0;
6187: max_size_of_constraint = 0;
6188: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6189: IS used_is;
6190: if (i<n_ISForEdges) {
6191: used_is = ISForEdges[i];
6192: } else {
6193: used_is = ISForFaces[i-n_ISForEdges];
6194: }
6195: ISGetSize(used_is,&j);
6196: total_counts += j;
6197: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6198: }
6199: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
6201: /* get local part of global near null space vectors */
6202: PetscMalloc1(nnsp_size,&localnearnullsp);
6203: for (k=0;k<nnsp_size;k++) {
6204: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6205: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6206: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6207: }
6209: /* whether or not to skip lapack calls */
6210: skip_lapack = PETSC_TRUE;
6211: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6213: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6214: if (!skip_lapack) {
6215: PetscScalar temp_work;
6217: if (use_pod) {
6218: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6219: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6220: PetscMalloc1(max_constraints,&singular_vals);
6221: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6222: #if defined(PETSC_USE_COMPLEX)
6223: PetscMalloc1(3*max_constraints,&rwork);
6224: #endif
6225: /* now we evaluate the optimal workspace using query with lwork=-1 */
6226: PetscBLASIntCast(max_constraints,&Blas_N);
6227: PetscBLASIntCast(max_constraints,&Blas_LDA);
6228: lwork = -1;
6229: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6230: #if !defined(PETSC_USE_COMPLEX)
6231: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6232: #else
6233: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6234: #endif
6235: PetscFPTrapPop();
6236: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6237: } else {
6238: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6239: /* SVD */
6240: PetscInt max_n,min_n;
6241: max_n = max_size_of_constraint;
6242: min_n = max_constraints;
6243: if (max_size_of_constraint < max_constraints) {
6244: min_n = max_size_of_constraint;
6245: max_n = max_constraints;
6246: }
6247: PetscMalloc1(min_n,&singular_vals);
6248: #if defined(PETSC_USE_COMPLEX)
6249: PetscMalloc1(5*min_n,&rwork);
6250: #endif
6251: /* now we evaluate the optimal workspace using query with lwork=-1 */
6252: lwork = -1;
6253: PetscBLASIntCast(max_n,&Blas_M);
6254: PetscBLASIntCast(min_n,&Blas_N);
6255: PetscBLASIntCast(max_n,&Blas_LDA);
6256: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6257: #if !defined(PETSC_USE_COMPLEX)
6258: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6259: #else
6260: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6261: #endif
6262: PetscFPTrapPop();
6263: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6264: #else
6265: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6266: #endif /* on missing GESVD */
6267: }
6268: /* Allocate optimal workspace */
6269: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6270: PetscMalloc1(lwork,&work);
6271: }
6272: /* Now we can loop on constraining sets */
6273: total_counts = 0;
6274: constraints_idxs_ptr[0] = 0;
6275: constraints_data_ptr[0] = 0;
6276: /* vertices */
6277: if (n_vertices) {
6278: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6279: PetscArraycpy(constraints_idxs,is_indices,n_vertices);
6280: for (i=0;i<n_vertices;i++) {
6281: constraints_n[total_counts] = 1;
6282: constraints_data[total_counts] = 1.0;
6283: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6284: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6285: total_counts++;
6286: }
6287: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6288: n_vertices = total_counts;
6289: }
6291: /* edges and faces */
6292: total_counts_cc = total_counts;
6293: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6294: IS used_is;
6295: PetscBool idxs_copied = PETSC_FALSE;
6297: if (ncc<n_ISForEdges) {
6298: used_is = ISForEdges[ncc];
6299: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6300: } else {
6301: used_is = ISForFaces[ncc-n_ISForEdges];
6302: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6303: }
6304: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6306: ISGetSize(used_is,&size_of_constraint);
6307: ISGetIndices(used_is,(const PetscInt**)&is_indices);
6308: /* change of basis should not be performed on local periodic nodes */
6309: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6310: if (nnsp_has_cnst) {
6311: PetscScalar quad_value;
6313: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6314: idxs_copied = PETSC_TRUE;
6316: if (!pcbddc->use_nnsp_true) {
6317: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6318: } else {
6319: quad_value = 1.0;
6320: }
6321: for (j=0;j<size_of_constraint;j++) {
6322: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6323: }
6324: temp_constraints++;
6325: total_counts++;
6326: }
6327: for (k=0;k<nnsp_size;k++) {
6328: PetscReal real_value;
6329: PetscScalar *ptr_to_data;
6331: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6332: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6333: for (j=0;j<size_of_constraint;j++) {
6334: ptr_to_data[j] = array[is_indices[j]];
6335: }
6336: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6337: /* check if array is null on the connected component */
6338: PetscBLASIntCast(size_of_constraint,&Blas_N);
6339: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6340: if (real_value > tol*size_of_constraint) { /* keep indices and values */
6341: temp_constraints++;
6342: total_counts++;
6343: if (!idxs_copied) {
6344: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6345: idxs_copied = PETSC_TRUE;
6346: }
6347: }
6348: }
6349: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6350: valid_constraints = temp_constraints;
6351: if (!pcbddc->use_nnsp_true && temp_constraints) {
6352: if (temp_constraints == 1) { /* just normalize the constraint */
6353: PetscScalar norm,*ptr_to_data;
6355: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6356: PetscBLASIntCast(size_of_constraint,&Blas_N);
6357: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6358: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6359: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6360: } else { /* perform SVD */
6361: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6363: if (use_pod) {
6364: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6365: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6366: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6367: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6368: from that computed using LAPACKgesvd
6369: -> This is due to a different computation of eigenvectors in LAPACKheev
6370: -> The quality of the POD-computed basis will be the same */
6371: PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);
6372: /* Store upper triangular part of correlation matrix */
6373: PetscBLASIntCast(size_of_constraint,&Blas_N);
6374: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6375: for (j=0;j<temp_constraints;j++) {
6376: for (k=0;k<j+1;k++) {
6377: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6378: }
6379: }
6380: /* compute eigenvalues and eigenvectors of correlation matrix */
6381: PetscBLASIntCast(temp_constraints,&Blas_N);
6382: PetscBLASIntCast(temp_constraints,&Blas_LDA);
6383: #if !defined(PETSC_USE_COMPLEX)
6384: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6385: #else
6386: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6387: #endif
6388: PetscFPTrapPop();
6389: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6390: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6391: j = 0;
6392: while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6393: total_counts = total_counts-j;
6394: valid_constraints = temp_constraints-j;
6395: /* scale and copy POD basis into used quadrature memory */
6396: PetscBLASIntCast(size_of_constraint,&Blas_M);
6397: PetscBLASIntCast(temp_constraints,&Blas_N);
6398: PetscBLASIntCast(temp_constraints,&Blas_K);
6399: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6400: PetscBLASIntCast(temp_constraints,&Blas_LDB);
6401: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6402: if (j<temp_constraints) {
6403: PetscInt ii;
6404: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6405: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6406: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6407: PetscFPTrapPop();
6408: for (k=0;k<temp_constraints-j;k++) {
6409: for (ii=0;ii<size_of_constraint;ii++) {
6410: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6411: }
6412: }
6413: }
6414: } else {
6415: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6416: PetscBLASIntCast(size_of_constraint,&Blas_M);
6417: PetscBLASIntCast(temp_constraints,&Blas_N);
6418: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6419: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6420: #if !defined(PETSC_USE_COMPLEX)
6421: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6422: #else
6423: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6424: #endif
6425: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6426: PetscFPTrapPop();
6427: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6428: k = temp_constraints;
6429: if (k > size_of_constraint) k = size_of_constraint;
6430: j = 0;
6431: while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6432: valid_constraints = k-j;
6433: total_counts = total_counts-temp_constraints+valid_constraints;
6434: #else
6435: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6436: #endif /* on missing GESVD */
6437: }
6438: }
6439: }
6440: /* update pointers information */
6441: if (valid_constraints) {
6442: constraints_n[total_counts_cc] = valid_constraints;
6443: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6444: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6445: /* set change_of_basis flag */
6446: if (boolforchange) {
6447: PetscBTSet(change_basis,total_counts_cc);
6448: }
6449: total_counts_cc++;
6450: }
6451: }
6452: /* free workspace */
6453: if (!skip_lapack) {
6454: PetscFree(work);
6455: #if defined(PETSC_USE_COMPLEX)
6456: PetscFree(rwork);
6457: #endif
6458: PetscFree(singular_vals);
6459: PetscFree(correlation_mat);
6460: PetscFree(temp_basis);
6461: }
6462: for (k=0;k<nnsp_size;k++) {
6463: VecDestroy(&localnearnullsp[k]);
6464: }
6465: PetscFree(localnearnullsp);
6466: /* free index sets of faces, edges and vertices */
6467: for (i=0;i<n_ISForFaces;i++) {
6468: ISDestroy(&ISForFaces[i]);
6469: }
6470: if (n_ISForFaces) {
6471: PetscFree(ISForFaces);
6472: }
6473: for (i=0;i<n_ISForEdges;i++) {
6474: ISDestroy(&ISForEdges[i]);
6475: }
6476: if (n_ISForEdges) {
6477: PetscFree(ISForEdges);
6478: }
6479: ISDestroy(&ISForVertices);
6480: } else {
6481: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6483: total_counts = 0;
6484: n_vertices = 0;
6485: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6486: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6487: }
6488: max_constraints = 0;
6489: total_counts_cc = 0;
6490: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6491: total_counts += pcbddc->adaptive_constraints_n[i];
6492: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6493: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6494: }
6495: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6496: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6497: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6498: constraints_data = pcbddc->adaptive_constraints_data;
6499: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6500: PetscMalloc1(total_counts_cc,&constraints_n);
6501: total_counts_cc = 0;
6502: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6503: if (pcbddc->adaptive_constraints_n[i]) {
6504: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6505: }
6506: }
6508: max_size_of_constraint = 0;
6509: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6510: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6511: /* Change of basis */
6512: PetscBTCreate(total_counts_cc,&change_basis);
6513: if (pcbddc->use_change_of_basis) {
6514: for (i=0;i<sub_schurs->n_subs;i++) {
6515: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6516: PetscBTSet(change_basis,i+n_vertices);
6517: }
6518: }
6519: }
6520: }
6521: pcbddc->local_primal_size = total_counts;
6522: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
6524: /* map constraints_idxs in boundary numbering */
6525: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6526: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);
6528: /* Create constraint matrix */
6529: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6530: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6531: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
6533: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6534: /* determine if a QR strategy is needed for change of basis */
6535: qr_needed = pcbddc->use_qr_single;
6536: PetscBTCreate(total_counts_cc,&qr_needed_idx);
6537: total_primal_vertices=0;
6538: pcbddc->local_primal_size_cc = 0;
6539: for (i=0;i<total_counts_cc;i++) {
6540: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6541: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6542: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6543: pcbddc->local_primal_size_cc += 1;
6544: } else if (PetscBTLookup(change_basis,i)) {
6545: for (k=0;k<constraints_n[i];k++) {
6546: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6547: }
6548: pcbddc->local_primal_size_cc += constraints_n[i];
6549: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6550: PetscBTSet(qr_needed_idx,i);
6551: qr_needed = PETSC_TRUE;
6552: }
6553: } else {
6554: pcbddc->local_primal_size_cc += 1;
6555: }
6556: }
6557: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6558: pcbddc->n_vertices = total_primal_vertices;
6559: /* permute indices in order to have a sorted set of vertices */
6560: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6561: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6562: PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);
6563: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6565: /* nonzero structure of constraint matrix */
6566: /* and get reference dof for local constraints */
6567: PetscMalloc1(pcbddc->local_primal_size,&nnz);
6568: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6570: j = total_primal_vertices;
6571: total_counts = total_primal_vertices;
6572: cum = total_primal_vertices;
6573: for (i=n_vertices;i<total_counts_cc;i++) {
6574: if (!PetscBTLookup(change_basis,i)) {
6575: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6576: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6577: cum++;
6578: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6579: for (k=0;k<constraints_n[i];k++) {
6580: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6581: nnz[j+k] = size_of_constraint;
6582: }
6583: j += constraints_n[i];
6584: }
6585: }
6586: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6587: MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6588: PetscFree(nnz);
6590: /* set values in constraint matrix */
6591: for (i=0;i<total_primal_vertices;i++) {
6592: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6593: }
6594: total_counts = total_primal_vertices;
6595: for (i=n_vertices;i<total_counts_cc;i++) {
6596: if (!PetscBTLookup(change_basis,i)) {
6597: PetscInt *cols;
6599: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6600: cols = constraints_idxs+constraints_idxs_ptr[i];
6601: for (k=0;k<constraints_n[i];k++) {
6602: PetscInt row = total_counts+k;
6603: PetscScalar *vals;
6605: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6606: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6607: }
6608: total_counts += constraints_n[i];
6609: }
6610: }
6611: /* assembling */
6612: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6613: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6614: MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");
6616: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6617: if (pcbddc->use_change_of_basis) {
6618: /* dual and primal dofs on a single cc */
6619: PetscInt dual_dofs,primal_dofs;
6620: /* working stuff for GEQRF */
6621: PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6622: PetscBLASInt lqr_work;
6623: /* working stuff for UNGQR */
6624: PetscScalar *gqr_work = NULL,lgqr_work_t=0.0;
6625: PetscBLASInt lgqr_work;
6626: /* working stuff for TRTRS */
6627: PetscScalar *trs_rhs = NULL;
6628: PetscBLASInt Blas_NRHS;
6629: /* pointers for values insertion into change of basis matrix */
6630: PetscInt *start_rows,*start_cols;
6631: PetscScalar *start_vals;
6632: /* working stuff for values insertion */
6633: PetscBT is_primal;
6634: PetscInt *aux_primal_numbering_B;
6635: /* matrix sizes */
6636: PetscInt global_size,local_size;
6637: /* temporary change of basis */
6638: Mat localChangeOfBasisMatrix;
6639: /* extra space for debugging */
6640: PetscScalar *dbg_work = NULL;
6642: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6643: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6644: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6645: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6646: /* nonzeros for local mat */
6647: PetscMalloc1(pcis->n,&nnz);
6648: if (!pcbddc->benign_change || pcbddc->fake_change) {
6649: for (i=0;i<pcis->n;i++) nnz[i]=1;
6650: } else {
6651: const PetscInt *ii;
6652: PetscInt n;
6653: PetscBool flg_row;
6654: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6655: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6656: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6657: }
6658: for (i=n_vertices;i<total_counts_cc;i++) {
6659: if (PetscBTLookup(change_basis,i)) {
6660: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6661: if (PetscBTLookup(qr_needed_idx,i)) {
6662: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6663: } else {
6664: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6665: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6666: }
6667: }
6668: }
6669: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6670: MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6671: PetscFree(nnz);
6672: /* Set interior change in the matrix */
6673: if (!pcbddc->benign_change || pcbddc->fake_change) {
6674: for (i=0;i<pcis->n;i++) {
6675: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6676: }
6677: } else {
6678: const PetscInt *ii,*jj;
6679: PetscScalar *aa;
6680: PetscInt n;
6681: PetscBool flg_row;
6682: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6683: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6684: for (i=0;i<n;i++) {
6685: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6686: }
6687: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6688: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6689: }
6691: if (pcbddc->dbg_flag) {
6692: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6693: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6694: }
6697: /* Now we loop on the constraints which need a change of basis */
6698: /*
6699: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6700: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6702: Basic blocks of change of basis matrix T computed by
6704: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6706: | 1 0 ... 0 s_1/S |
6707: | 0 1 ... 0 s_2/S |
6708: | ... |
6709: | 0 ... 1 s_{n-1}/S |
6710: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6712: with S = \sum_{i=1}^n s_i^2
6713: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6714: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6716: - QR decomposition of constraints otherwise
6717: */
6718: if (qr_needed && max_size_of_constraint) {
6719: /* space to store Q */
6720: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6721: /* array to store scaling factors for reflectors */
6722: PetscMalloc1(max_constraints,&qr_tau);
6723: /* first we issue queries for optimal work */
6724: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6725: PetscBLASIntCast(max_constraints,&Blas_N);
6726: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6727: lqr_work = -1;
6728: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6729: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6730: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6731: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6732: lgqr_work = -1;
6733: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6734: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6735: PetscBLASIntCast(max_constraints,&Blas_K);
6736: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6737: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6738: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6739: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6740: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6741: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6742: /* array to store rhs and solution of triangular solver */
6743: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6744: /* allocating workspace for check */
6745: if (pcbddc->dbg_flag) {
6746: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6747: }
6748: }
6749: /* array to store whether a node is primal or not */
6750: PetscBTCreate(pcis->n_B,&is_primal);
6751: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6752: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6753: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6754: for (i=0;i<total_primal_vertices;i++) {
6755: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6756: }
6757: PetscFree(aux_primal_numbering_B);
6759: /* loop on constraints and see whether or not they need a change of basis and compute it */
6760: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6761: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6762: if (PetscBTLookup(change_basis,total_counts)) {
6763: /* get constraint info */
6764: primal_dofs = constraints_n[total_counts];
6765: dual_dofs = size_of_constraint-primal_dofs;
6767: if (pcbddc->dbg_flag) {
6768: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6769: }
6771: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6773: /* copy quadrature constraints for change of basis check */
6774: if (pcbddc->dbg_flag) {
6775: PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6776: }
6777: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6778: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6780: /* compute QR decomposition of constraints */
6781: PetscBLASIntCast(size_of_constraint,&Blas_M);
6782: PetscBLASIntCast(primal_dofs,&Blas_N);
6783: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6784: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6785: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6786: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6787: PetscFPTrapPop();
6789: /* explictly compute R^-T */
6790: PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);
6791: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6792: PetscBLASIntCast(primal_dofs,&Blas_N);
6793: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6794: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6795: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6796: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6797: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6798: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6799: PetscFPTrapPop();
6801: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6802: PetscBLASIntCast(size_of_constraint,&Blas_M);
6803: PetscBLASIntCast(size_of_constraint,&Blas_N);
6804: PetscBLASIntCast(primal_dofs,&Blas_K);
6805: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6806: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6807: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6808: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6809: PetscFPTrapPop();
6811: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6812: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6813: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6814: PetscBLASIntCast(size_of_constraint,&Blas_M);
6815: PetscBLASIntCast(primal_dofs,&Blas_N);
6816: PetscBLASIntCast(primal_dofs,&Blas_K);
6817: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6818: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6819: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6820: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6821: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6822: PetscFPTrapPop();
6823: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6825: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6826: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6827: /* insert cols for primal dofs */
6828: for (j=0;j<primal_dofs;j++) {
6829: start_vals = &qr_basis[j*size_of_constraint];
6830: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6831: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6832: }
6833: /* insert cols for dual dofs */
6834: for (j=0,k=0;j<dual_dofs;k++) {
6835: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6836: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6837: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6838: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6839: j++;
6840: }
6841: }
6843: /* check change of basis */
6844: if (pcbddc->dbg_flag) {
6845: PetscInt ii,jj;
6846: PetscBool valid_qr=PETSC_TRUE;
6847: PetscBLASIntCast(primal_dofs,&Blas_M);
6848: PetscBLASIntCast(size_of_constraint,&Blas_N);
6849: PetscBLASIntCast(size_of_constraint,&Blas_K);
6850: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6851: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6852: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6853: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6854: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6855: PetscFPTrapPop();
6856: for (jj=0;jj<size_of_constraint;jj++) {
6857: for (ii=0;ii<primal_dofs;ii++) {
6858: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6859: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6860: }
6861: }
6862: if (!valid_qr) {
6863: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6864: for (jj=0;jj<size_of_constraint;jj++) {
6865: for (ii=0;ii<primal_dofs;ii++) {
6866: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6867: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6868: }
6869: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6870: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6871: }
6872: }
6873: }
6874: } else {
6875: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6876: }
6877: }
6878: } else { /* simple transformation block */
6879: PetscInt row,col;
6880: PetscScalar val,norm;
6882: PetscBLASIntCast(size_of_constraint,&Blas_N);
6883: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6884: for (j=0;j<size_of_constraint;j++) {
6885: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6886: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6887: if (!PetscBTLookup(is_primal,row_B)) {
6888: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6889: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6890: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6891: } else {
6892: for (k=0;k<size_of_constraint;k++) {
6893: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6894: if (row != col) {
6895: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6896: } else {
6897: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6898: }
6899: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6900: }
6901: }
6902: }
6903: if (pcbddc->dbg_flag) {
6904: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6905: }
6906: }
6907: } else {
6908: if (pcbddc->dbg_flag) {
6909: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6910: }
6911: }
6912: }
6914: /* free workspace */
6915: if (qr_needed) {
6916: if (pcbddc->dbg_flag) {
6917: PetscFree(dbg_work);
6918: }
6919: PetscFree(trs_rhs);
6920: PetscFree(qr_tau);
6921: PetscFree(qr_work);
6922: PetscFree(gqr_work);
6923: PetscFree(qr_basis);
6924: }
6925: PetscBTDestroy(&is_primal);
6926: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6927: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6929: /* assembling of global change of variable */
6930: if (!pcbddc->fake_change) {
6931: Mat tmat;
6932: PetscInt bs;
6934: VecGetSize(pcis->vec1_global,&global_size);
6935: VecGetLocalSize(pcis->vec1_global,&local_size);
6936: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6937: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6938: MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6939: MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6940: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6941: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6942: MatGetBlockSize(pc->pmat,&bs);
6943: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6944: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6945: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6946: MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6947: MatDestroy(&tmat);
6948: VecSet(pcis->vec1_global,0.0);
6949: VecSet(pcis->vec1_N,1.0);
6950: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6951: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6952: VecReciprocal(pcis->vec1_global);
6953: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6955: /* check */
6956: if (pcbddc->dbg_flag) {
6957: PetscReal error;
6958: Vec x,x_change;
6960: VecDuplicate(pcis->vec1_global,&x);
6961: VecDuplicate(pcis->vec1_global,&x_change);
6962: VecSetRandom(x,NULL);
6963: VecCopy(x,pcis->vec1_global);
6964: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6965: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6966: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6967: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6968: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6969: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6970: VecAXPY(x,-1.0,x_change);
6971: VecNorm(x,NORM_INFINITY,&error);
6972: if (error > PETSC_SMALL) {
6973: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6974: }
6975: VecDestroy(&x);
6976: VecDestroy(&x_change);
6977: }
6978: /* adapt sub_schurs computed (if any) */
6979: if (pcbddc->use_deluxe_scaling) {
6980: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6982: if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6983: if (sub_schurs && sub_schurs->S_Ej_all) {
6984: Mat S_new,tmat;
6985: IS is_all_N,is_V_Sall = NULL;
6987: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6988: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6989: if (pcbddc->deluxe_zerorows) {
6990: ISLocalToGlobalMapping NtoSall;
6991: IS is_V;
6992: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6993: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6994: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6995: ISLocalToGlobalMappingDestroy(&NtoSall);
6996: ISDestroy(&is_V);
6997: }
6998: ISDestroy(&is_all_N);
6999: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7000: MatDestroy(&sub_schurs->S_Ej_all);
7001: PetscObjectReference((PetscObject)S_new);
7002: if (pcbddc->deluxe_zerorows) {
7003: const PetscScalar *array;
7004: const PetscInt *idxs_V,*idxs_all;
7005: PetscInt i,n_V;
7007: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7008: ISGetLocalSize(is_V_Sall,&n_V);
7009: ISGetIndices(is_V_Sall,&idxs_V);
7010: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
7011: VecGetArrayRead(pcis->D,&array);
7012: for (i=0;i<n_V;i++) {
7013: PetscScalar val;
7014: PetscInt idx;
7016: idx = idxs_V[i];
7017: val = array[idxs_all[idxs_V[i]]];
7018: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
7019: }
7020: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
7021: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
7022: VecRestoreArrayRead(pcis->D,&array);
7023: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
7024: ISRestoreIndices(is_V_Sall,&idxs_V);
7025: }
7026: sub_schurs->S_Ej_all = S_new;
7027: MatDestroy(&S_new);
7028: if (sub_schurs->sum_S_Ej_all) {
7029: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7030: MatDestroy(&sub_schurs->sum_S_Ej_all);
7031: PetscObjectReference((PetscObject)S_new);
7032: if (pcbddc->deluxe_zerorows) {
7033: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7034: }
7035: sub_schurs->sum_S_Ej_all = S_new;
7036: MatDestroy(&S_new);
7037: }
7038: ISDestroy(&is_V_Sall);
7039: MatDestroy(&tmat);
7040: }
7041: /* destroy any change of basis context in sub_schurs */
7042: if (sub_schurs && sub_schurs->change) {
7043: PetscInt i;
7045: for (i=0;i<sub_schurs->n_subs;i++) {
7046: KSPDestroy(&sub_schurs->change[i]);
7047: }
7048: PetscFree(sub_schurs->change);
7049: }
7050: }
7051: if (pcbddc->switch_static) { /* need to save the local change */
7052: pcbddc->switch_static_change = localChangeOfBasisMatrix;
7053: } else {
7054: MatDestroy(&localChangeOfBasisMatrix);
7055: }
7056: /* determine if any process has changed the pressures locally */
7057: pcbddc->change_interior = pcbddc->benign_have_null;
7058: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7059: MatDestroy(&pcbddc->ConstraintMatrix);
7060: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7061: pcbddc->use_qr_single = qr_needed;
7062: }
7063: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7064: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7065: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
7066: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7067: } else {
7068: Mat benign_global = NULL;
7069: if (pcbddc->benign_have_null) {
7070: Mat M;
7072: pcbddc->change_interior = PETSC_TRUE;
7073: VecCopy(matis->counter,pcis->vec1_N);
7074: VecReciprocal(pcis->vec1_N);
7075: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
7076: if (pcbddc->benign_change) {
7077: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
7078: MatDiagonalScale(M,pcis->vec1_N,NULL);
7079: } else {
7080: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
7081: MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
7082: }
7083: MatISSetLocalMat(benign_global,M);
7084: MatDestroy(&M);
7085: MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
7086: MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
7087: }
7088: if (pcbddc->user_ChangeOfBasisMatrix) {
7089: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
7090: MatDestroy(&benign_global);
7091: } else if (pcbddc->benign_have_null) {
7092: pcbddc->ChangeOfBasisMatrix = benign_global;
7093: }
7094: }
7095: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7096: IS is_global;
7097: const PetscInt *gidxs;
7099: ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
7100: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
7101: ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
7102: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7103: ISDestroy(&is_global);
7104: }
7105: }
7106: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7107: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7108: }
7110: if (!pcbddc->fake_change) {
7111: /* add pressure dofs to set of primal nodes for numbering purposes */
7112: for (i=0;i<pcbddc->benign_n;i++) {
7113: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7114: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7115: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7116: pcbddc->local_primal_size_cc++;
7117: pcbddc->local_primal_size++;
7118: }
7120: /* check if a new primal space has been introduced (also take into account benign trick) */
7121: pcbddc->new_primal_space_local = PETSC_TRUE;
7122: if (olocal_primal_size == pcbddc->local_primal_size) {
7123: PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7124: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7125: if (!pcbddc->new_primal_space_local) {
7126: PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7127: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7128: }
7129: }
7130: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7131: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7132: }
7133: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
7135: /* flush dbg viewer */
7136: if (pcbddc->dbg_flag) {
7137: PetscViewerFlush(pcbddc->dbg_viewer);
7138: }
7140: /* free workspace */
7141: PetscBTDestroy(&qr_needed_idx);
7142: PetscBTDestroy(&change_basis);
7143: if (!pcbddc->adaptive_selection) {
7144: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7145: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7146: } else {
7147: PetscFree5(pcbddc->adaptive_constraints_n,
7148: pcbddc->adaptive_constraints_idxs_ptr,
7149: pcbddc->adaptive_constraints_data_ptr,
7150: pcbddc->adaptive_constraints_idxs,
7151: pcbddc->adaptive_constraints_data);
7152: PetscFree(constraints_n);
7153: PetscFree(constraints_idxs_B);
7154: }
7155: return(0);
7156: }
7158: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7159: {
7160: ISLocalToGlobalMapping map;
7161: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7162: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
7163: PetscInt i,N;
7164: PetscBool rcsr = PETSC_FALSE;
7165: PetscErrorCode ierr;
7168: if (pcbddc->recompute_topography) {
7169: pcbddc->graphanalyzed = PETSC_FALSE;
7170: /* Reset previously computed graph */
7171: PCBDDCGraphReset(pcbddc->mat_graph);
7172: /* Init local Graph struct */
7173: MatGetSize(pc->pmat,&N,NULL);
7174: MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7175: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
7177: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7178: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7179: }
7180: /* Check validity of the csr graph passed in by the user */
7181: if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
7183: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7184: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7185: PetscInt *xadj,*adjncy;
7186: PetscInt nvtxs;
7187: PetscBool flg_row=PETSC_FALSE;
7189: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7190: if (flg_row) {
7191: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7192: pcbddc->computed_rowadj = PETSC_TRUE;
7193: }
7194: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7195: rcsr = PETSC_TRUE;
7196: }
7197: if (pcbddc->dbg_flag) {
7198: PetscViewerFlush(pcbddc->dbg_viewer);
7199: }
7201: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7202: PetscReal *lcoords;
7203: PetscInt n;
7204: MPI_Datatype dimrealtype;
7206: /* TODO: support for blocked */
7207: if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7208: MatGetLocalSize(matis->A,&n,NULL);
7209: PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7210: MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7211: MPI_Type_commit(&dimrealtype);
7212: PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7213: PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7214: MPI_Type_free(&dimrealtype);
7215: PetscFree(pcbddc->mat_graph->coords);
7217: pcbddc->mat_graph->coords = lcoords;
7218: pcbddc->mat_graph->cloc = PETSC_TRUE;
7219: pcbddc->mat_graph->cnloc = n;
7220: }
7221: if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7222: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);
7224: /* Setup of Graph */
7225: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7226: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
7228: /* attach info on disconnected subdomains if present */
7229: if (pcbddc->n_local_subs) {
7230: PetscInt *local_subs,n,totn;
7232: MatGetLocalSize(matis->A,&n,NULL);
7233: PetscMalloc1(n,&local_subs);
7234: for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7235: for (i=0;i<pcbddc->n_local_subs;i++) {
7236: const PetscInt *idxs;
7237: PetscInt nl,j;
7239: ISGetLocalSize(pcbddc->local_subs[i],&nl);
7240: ISGetIndices(pcbddc->local_subs[i],&idxs);
7241: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7242: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7243: }
7244: for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7245: pcbddc->mat_graph->n_local_subs = totn + 1;
7246: pcbddc->mat_graph->local_subs = local_subs;
7247: }
7248: }
7250: if (!pcbddc->graphanalyzed) {
7251: /* Graph's connected components analysis */
7252: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7253: pcbddc->graphanalyzed = PETSC_TRUE;
7254: pcbddc->corner_selected = pcbddc->corner_selection;
7255: }
7256: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7257: return(0);
7258: }
7260: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7261: {
7262: PetscInt i,j,n;
7263: PetscScalar *alphas;
7264: PetscReal norm,*onorms;
7268: n = *nio;
7269: if (!n) return(0);
7270: PetscMalloc2(n,&alphas,n,&onorms);
7271: VecNormalize(vecs[0],&norm);
7272: if (norm < PETSC_SMALL) {
7273: onorms[0] = 0.0;
7274: VecSet(vecs[0],0.0);
7275: } else {
7276: onorms[0] = norm;
7277: }
7279: for (i=1;i<n;i++) {
7280: VecMDot(vecs[i],i,vecs,alphas);
7281: for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7282: VecMAXPY(vecs[i],i,alphas,vecs);
7283: VecNormalize(vecs[i],&norm);
7284: if (norm < PETSC_SMALL) {
7285: onorms[i] = 0.0;
7286: VecSet(vecs[i],0.0);
7287: } else {
7288: onorms[i] = norm;
7289: }
7290: }
7291: /* push nonzero vectors at the beginning */
7292: for (i=0;i<n;i++) {
7293: if (onorms[i] == 0.0) {
7294: for (j=i+1;j<n;j++) {
7295: if (onorms[j] != 0.0) {
7296: VecCopy(vecs[j],vecs[i]);
7297: onorms[j] = 0.0;
7298: }
7299: }
7300: }
7301: }
7302: for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7303: PetscFree2(alphas,onorms);
7304: return(0);
7305: }
7307: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7308: {
7309: Mat A;
7310: PetscInt n_neighs,*neighs,*n_shared,**shared;
7311: PetscMPIInt size,rank,color;
7312: PetscInt *xadj,*adjncy;
7313: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7314: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
7315: PetscInt void_procs,*procs_candidates = NULL;
7316: PetscInt xadj_count,*count;
7317: PetscBool ismatis,use_vwgt=PETSC_FALSE;
7318: PetscSubcomm psubcomm;
7319: MPI_Comm subcomm;
7324: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7325: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7328: if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);
7330: if (have_void) *have_void = PETSC_FALSE;
7331: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7332: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7333: MatISGetLocalMat(mat,&A);
7334: MatGetLocalSize(A,&n,NULL);
7335: im_active = !!n;
7336: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7337: void_procs = size - active_procs;
7338: /* get ranks of of non-active processes in mat communicator */
7339: if (void_procs) {
7340: PetscInt ncand;
7342: if (have_void) *have_void = PETSC_TRUE;
7343: PetscMalloc1(size,&procs_candidates);
7344: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7345: for (i=0,ncand=0;i<size;i++) {
7346: if (!procs_candidates[i]) {
7347: procs_candidates[ncand++] = i;
7348: }
7349: }
7350: /* force n_subdomains to be not greater that the number of non-active processes */
7351: *n_subdomains = PetscMin(void_procs,*n_subdomains);
7352: }
7354: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7355: number of subdomains requested 1 -> send to master or first candidate in voids */
7356: MatGetSize(mat,&N,NULL);
7357: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7358: PetscInt issize,isidx,dest;
7359: if (*n_subdomains == 1) dest = 0;
7360: else dest = rank;
7361: if (im_active) {
7362: issize = 1;
7363: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7364: isidx = procs_candidates[dest];
7365: } else {
7366: isidx = dest;
7367: }
7368: } else {
7369: issize = 0;
7370: isidx = -1;
7371: }
7372: if (*n_subdomains != 1) *n_subdomains = active_procs;
7373: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7374: PetscFree(procs_candidates);
7375: return(0);
7376: }
7377: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7378: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7379: threshold = PetscMax(threshold,2);
7381: /* Get info on mapping */
7382: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7384: /* build local CSR graph of subdomains' connectivity */
7385: PetscMalloc1(2,&xadj);
7386: xadj[0] = 0;
7387: xadj[1] = PetscMax(n_neighs-1,0);
7388: PetscMalloc1(xadj[1],&adjncy);
7389: PetscMalloc1(xadj[1],&adjncy_wgt);
7390: PetscCalloc1(n,&count);
7391: for (i=1;i<n_neighs;i++)
7392: for (j=0;j<n_shared[i];j++)
7393: count[shared[i][j]] += 1;
7395: xadj_count = 0;
7396: for (i=1;i<n_neighs;i++) {
7397: for (j=0;j<n_shared[i];j++) {
7398: if (count[shared[i][j]] < threshold) {
7399: adjncy[xadj_count] = neighs[i];
7400: adjncy_wgt[xadj_count] = n_shared[i];
7401: xadj_count++;
7402: break;
7403: }
7404: }
7405: }
7406: xadj[1] = xadj_count;
7407: PetscFree(count);
7408: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7409: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7411: PetscMalloc1(1,&ranks_send_to_idx);
7413: /* Restrict work on active processes only */
7414: PetscMPIIntCast(im_active,&color);
7415: if (void_procs) {
7416: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7417: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7418: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7419: subcomm = PetscSubcommChild(psubcomm);
7420: } else {
7421: psubcomm = NULL;
7422: subcomm = PetscObjectComm((PetscObject)mat);
7423: }
7425: v_wgt = NULL;
7426: if (!color) {
7427: PetscFree(xadj);
7428: PetscFree(adjncy);
7429: PetscFree(adjncy_wgt);
7430: } else {
7431: Mat subdomain_adj;
7432: IS new_ranks,new_ranks_contig;
7433: MatPartitioning partitioner;
7434: PetscInt rstart=0,rend=0;
7435: PetscInt *is_indices,*oldranks;
7436: PetscMPIInt size;
7437: PetscBool aggregate;
7439: MPI_Comm_size(subcomm,&size);
7440: if (void_procs) {
7441: PetscInt prank = rank;
7442: PetscMalloc1(size,&oldranks);
7443: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7444: for (i=0;i<xadj[1];i++) {
7445: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7446: }
7447: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7448: } else {
7449: oldranks = NULL;
7450: }
7451: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7452: if (aggregate) { /* TODO: all this part could be made more efficient */
7453: PetscInt lrows,row,ncols,*cols;
7454: PetscMPIInt nrank;
7455: PetscScalar *vals;
7457: MPI_Comm_rank(subcomm,&nrank);
7458: lrows = 0;
7459: if (nrank<redprocs) {
7460: lrows = size/redprocs;
7461: if (nrank<size%redprocs) lrows++;
7462: }
7463: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7464: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7465: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7466: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7467: row = nrank;
7468: ncols = xadj[1]-xadj[0];
7469: cols = adjncy;
7470: PetscMalloc1(ncols,&vals);
7471: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7472: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7473: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7474: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7475: PetscFree(xadj);
7476: PetscFree(adjncy);
7477: PetscFree(adjncy_wgt);
7478: PetscFree(vals);
7479: if (use_vwgt) {
7480: Vec v;
7481: const PetscScalar *array;
7482: PetscInt nl;
7484: MatCreateVecs(subdomain_adj,&v,NULL);
7485: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7486: VecAssemblyBegin(v);
7487: VecAssemblyEnd(v);
7488: VecGetLocalSize(v,&nl);
7489: VecGetArrayRead(v,&array);
7490: PetscMalloc1(nl,&v_wgt);
7491: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7492: VecRestoreArrayRead(v,&array);
7493: VecDestroy(&v);
7494: }
7495: } else {
7496: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7497: if (use_vwgt) {
7498: PetscMalloc1(1,&v_wgt);
7499: v_wgt[0] = n;
7500: }
7501: }
7502: /* MatView(subdomain_adj,0); */
7504: /* Partition */
7505: MatPartitioningCreate(subcomm,&partitioner);
7506: #if defined(PETSC_HAVE_PTSCOTCH)
7507: MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7508: #elif defined(PETSC_HAVE_PARMETIS)
7509: MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7510: #else
7511: MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7512: #endif
7513: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7514: if (v_wgt) {
7515: MatPartitioningSetVertexWeights(partitioner,v_wgt);
7516: }
7517: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7518: MatPartitioningSetNParts(partitioner,*n_subdomains);
7519: MatPartitioningSetFromOptions(partitioner);
7520: MatPartitioningApply(partitioner,&new_ranks);
7521: /* MatPartitioningView(partitioner,0); */
7523: /* renumber new_ranks to avoid "holes" in new set of processors */
7524: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7525: ISDestroy(&new_ranks);
7526: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7527: if (!aggregate) {
7528: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7529: #if defined(PETSC_USE_DEBUG)
7530: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7531: #endif
7532: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7533: } else if (oldranks) {
7534: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7535: } else {
7536: ranks_send_to_idx[0] = is_indices[0];
7537: }
7538: } else {
7539: PetscInt idx = 0;
7540: PetscMPIInt tag;
7541: MPI_Request *reqs;
7543: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7544: PetscMalloc1(rend-rstart,&reqs);
7545: for (i=rstart;i<rend;i++) {
7546: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7547: }
7548: MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7549: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7550: PetscFree(reqs);
7551: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7552: #if defined(PETSC_USE_DEBUG)
7553: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7554: #endif
7555: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7556: } else if (oldranks) {
7557: ranks_send_to_idx[0] = oldranks[idx];
7558: } else {
7559: ranks_send_to_idx[0] = idx;
7560: }
7561: }
7562: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7563: /* clean up */
7564: PetscFree(oldranks);
7565: ISDestroy(&new_ranks_contig);
7566: MatDestroy(&subdomain_adj);
7567: MatPartitioningDestroy(&partitioner);
7568: }
7569: PetscSubcommDestroy(&psubcomm);
7570: PetscFree(procs_candidates);
7572: /* assemble parallel IS for sends */
7573: i = 1;
7574: if (!color) i=0;
7575: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7576: return(0);
7577: }
7579: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7581: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7582: {
7583: Mat local_mat;
7584: IS is_sends_internal;
7585: PetscInt rows,cols,new_local_rows;
7586: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7587: PetscBool ismatis,isdense,newisdense,destroy_mat;
7588: ISLocalToGlobalMapping l2gmap;
7589: PetscInt* l2gmap_indices;
7590: const PetscInt* is_indices;
7591: MatType new_local_type;
7592: /* buffers */
7593: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7594: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7595: PetscInt *recv_buffer_idxs_local;
7596: PetscScalar *ptr_vals,*recv_buffer_vals;
7597: const PetscScalar *send_buffer_vals;
7598: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7599: /* MPI */
7600: MPI_Comm comm,comm_n;
7601: PetscSubcomm subcomm;
7602: PetscMPIInt n_sends,n_recvs,size;
7603: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7604: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7605: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7606: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7607: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7608: PetscErrorCode ierr;
7612: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7613: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7620: if (nvecs) {
7621: if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7623: }
7624: /* further checks */
7625: MatISGetLocalMat(mat,&local_mat);
7626: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7627: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7628: MatGetSize(local_mat,&rows,&cols);
7629: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7630: if (reuse && *mat_n) {
7631: PetscInt mrows,mcols,mnrows,mncols;
7633: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7634: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7635: MatGetSize(mat,&mrows,&mcols);
7636: MatGetSize(*mat_n,&mnrows,&mncols);
7637: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7638: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7639: }
7640: MatGetBlockSize(local_mat,&bs);
7643: /* prepare IS for sending if not provided */
7644: if (!is_sends) {
7645: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7646: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7647: } else {
7648: PetscObjectReference((PetscObject)is_sends);
7649: is_sends_internal = is_sends;
7650: }
7652: /* get comm */
7653: PetscObjectGetComm((PetscObject)mat,&comm);
7655: /* compute number of sends */
7656: ISGetLocalSize(is_sends_internal,&i);
7657: PetscMPIIntCast(i,&n_sends);
7659: /* compute number of receives */
7660: MPI_Comm_size(comm,&size);
7661: PetscMalloc1(size,&iflags);
7662: PetscArrayzero(iflags,size);
7663: ISGetIndices(is_sends_internal,&is_indices);
7664: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7665: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7666: PetscFree(iflags);
7668: /* restrict comm if requested */
7669: subcomm = 0;
7670: destroy_mat = PETSC_FALSE;
7671: if (restrict_comm) {
7672: PetscMPIInt color,subcommsize;
7674: color = 0;
7675: if (restrict_full) {
7676: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7677: } else {
7678: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7679: }
7680: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7681: subcommsize = size - subcommsize;
7682: /* check if reuse has been requested */
7683: if (reuse) {
7684: if (*mat_n) {
7685: PetscMPIInt subcommsize2;
7686: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7687: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7688: comm_n = PetscObjectComm((PetscObject)*mat_n);
7689: } else {
7690: comm_n = PETSC_COMM_SELF;
7691: }
7692: } else { /* MAT_INITIAL_MATRIX */
7693: PetscMPIInt rank;
7695: MPI_Comm_rank(comm,&rank);
7696: PetscSubcommCreate(comm,&subcomm);
7697: PetscSubcommSetNumber(subcomm,2);
7698: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7699: comm_n = PetscSubcommChild(subcomm);
7700: }
7701: /* flag to destroy *mat_n if not significative */
7702: if (color) destroy_mat = PETSC_TRUE;
7703: } else {
7704: comm_n = comm;
7705: }
7707: /* prepare send/receive buffers */
7708: PetscMalloc1(size,&ilengths_idxs);
7709: PetscArrayzero(ilengths_idxs,size);
7710: PetscMalloc1(size,&ilengths_vals);
7711: PetscArrayzero(ilengths_vals,size);
7712: if (nis) {
7713: PetscCalloc1(size,&ilengths_idxs_is);
7714: }
7716: /* Get data from local matrices */
7717: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7718: /* TODO: See below some guidelines on how to prepare the local buffers */
7719: /*
7720: send_buffer_vals should contain the raw values of the local matrix
7721: send_buffer_idxs should contain:
7722: - MatType_PRIVATE type
7723: - PetscInt size_of_l2gmap
7724: - PetscInt global_row_indices[size_of_l2gmap]
7725: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7726: */
7727: else {
7728: MatDenseGetArrayRead(local_mat,&send_buffer_vals);
7729: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7730: PetscMalloc1(i+2,&send_buffer_idxs);
7731: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7732: send_buffer_idxs[1] = i;
7733: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7734: PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);
7735: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7736: PetscMPIIntCast(i,&len);
7737: for (i=0;i<n_sends;i++) {
7738: ilengths_vals[is_indices[i]] = len*len;
7739: ilengths_idxs[is_indices[i]] = len+2;
7740: }
7741: }
7742: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7743: /* additional is (if any) */
7744: if (nis) {
7745: PetscMPIInt psum;
7746: PetscInt j;
7747: for (j=0,psum=0;j<nis;j++) {
7748: PetscInt plen;
7749: ISGetLocalSize(isarray[j],&plen);
7750: PetscMPIIntCast(plen,&len);
7751: psum += len+1; /* indices + lenght */
7752: }
7753: PetscMalloc1(psum,&send_buffer_idxs_is);
7754: for (j=0,psum=0;j<nis;j++) {
7755: PetscInt plen;
7756: const PetscInt *is_array_idxs;
7757: ISGetLocalSize(isarray[j],&plen);
7758: send_buffer_idxs_is[psum] = plen;
7759: ISGetIndices(isarray[j],&is_array_idxs);
7760: PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);
7761: ISRestoreIndices(isarray[j],&is_array_idxs);
7762: psum += plen+1; /* indices + lenght */
7763: }
7764: for (i=0;i<n_sends;i++) {
7765: ilengths_idxs_is[is_indices[i]] = psum;
7766: }
7767: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7768: }
7769: MatISRestoreLocalMat(mat,&local_mat);
7771: buf_size_idxs = 0;
7772: buf_size_vals = 0;
7773: buf_size_idxs_is = 0;
7774: buf_size_vecs = 0;
7775: for (i=0;i<n_recvs;i++) {
7776: buf_size_idxs += (PetscInt)olengths_idxs[i];
7777: buf_size_vals += (PetscInt)olengths_vals[i];
7778: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7779: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7780: }
7781: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7782: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7783: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7784: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7786: /* get new tags for clean communications */
7787: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7788: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7789: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7790: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7792: /* allocate for requests */
7793: PetscMalloc1(n_sends,&send_req_idxs);
7794: PetscMalloc1(n_sends,&send_req_vals);
7795: PetscMalloc1(n_sends,&send_req_idxs_is);
7796: PetscMalloc1(n_sends,&send_req_vecs);
7797: PetscMalloc1(n_recvs,&recv_req_idxs);
7798: PetscMalloc1(n_recvs,&recv_req_vals);
7799: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7800: PetscMalloc1(n_recvs,&recv_req_vecs);
7802: /* communications */
7803: ptr_idxs = recv_buffer_idxs;
7804: ptr_vals = recv_buffer_vals;
7805: ptr_idxs_is = recv_buffer_idxs_is;
7806: ptr_vecs = recv_buffer_vecs;
7807: for (i=0;i<n_recvs;i++) {
7808: source_dest = onodes[i];
7809: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7810: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7811: ptr_idxs += olengths_idxs[i];
7812: ptr_vals += olengths_vals[i];
7813: if (nis) {
7814: source_dest = onodes_is[i];
7815: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7816: ptr_idxs_is += olengths_idxs_is[i];
7817: }
7818: if (nvecs) {
7819: source_dest = onodes[i];
7820: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7821: ptr_vecs += olengths_idxs[i]-2;
7822: }
7823: }
7824: for (i=0;i<n_sends;i++) {
7825: PetscMPIIntCast(is_indices[i],&source_dest);
7826: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7827: MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7828: if (nis) {
7829: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7830: }
7831: if (nvecs) {
7832: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7833: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7834: }
7835: }
7836: ISRestoreIndices(is_sends_internal,&is_indices);
7837: ISDestroy(&is_sends_internal);
7839: /* assemble new l2g map */
7840: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7841: ptr_idxs = recv_buffer_idxs;
7842: new_local_rows = 0;
7843: for (i=0;i<n_recvs;i++) {
7844: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7845: ptr_idxs += olengths_idxs[i];
7846: }
7847: PetscMalloc1(new_local_rows,&l2gmap_indices);
7848: ptr_idxs = recv_buffer_idxs;
7849: new_local_rows = 0;
7850: for (i=0;i<n_recvs;i++) {
7851: PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));
7852: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7853: ptr_idxs += olengths_idxs[i];
7854: }
7855: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7856: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7857: PetscFree(l2gmap_indices);
7859: /* infer new local matrix type from received local matrices type */
7860: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7861: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7862: if (n_recvs) {
7863: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7864: ptr_idxs = recv_buffer_idxs;
7865: for (i=0;i<n_recvs;i++) {
7866: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7867: new_local_type_private = MATAIJ_PRIVATE;
7868: break;
7869: }
7870: ptr_idxs += olengths_idxs[i];
7871: }
7872: switch (new_local_type_private) {
7873: case MATDENSE_PRIVATE:
7874: new_local_type = MATSEQAIJ;
7875: bs = 1;
7876: break;
7877: case MATAIJ_PRIVATE:
7878: new_local_type = MATSEQAIJ;
7879: bs = 1;
7880: break;
7881: case MATBAIJ_PRIVATE:
7882: new_local_type = MATSEQBAIJ;
7883: break;
7884: case MATSBAIJ_PRIVATE:
7885: new_local_type = MATSEQSBAIJ;
7886: break;
7887: default:
7888: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7889: break;
7890: }
7891: } else { /* by default, new_local_type is seqaij */
7892: new_local_type = MATSEQAIJ;
7893: bs = 1;
7894: }
7896: /* create MATIS object if needed */
7897: if (!reuse) {
7898: MatGetSize(mat,&rows,&cols);
7899: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7900: } else {
7901: /* it also destroys the local matrices */
7902: if (*mat_n) {
7903: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7904: } else { /* this is a fake object */
7905: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7906: }
7907: }
7908: MatISGetLocalMat(*mat_n,&local_mat);
7909: MatSetType(local_mat,new_local_type);
7911: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7913: /* Global to local map of received indices */
7914: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7915: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7916: ISLocalToGlobalMappingDestroy(&l2gmap);
7918: /* restore attributes -> type of incoming data and its size */
7919: buf_size_idxs = 0;
7920: for (i=0;i<n_recvs;i++) {
7921: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7922: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7923: buf_size_idxs += (PetscInt)olengths_idxs[i];
7924: }
7925: PetscFree(recv_buffer_idxs);
7927: /* set preallocation */
7928: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7929: if (!newisdense) {
7930: PetscInt *new_local_nnz=0;
7932: ptr_idxs = recv_buffer_idxs_local;
7933: if (n_recvs) {
7934: PetscCalloc1(new_local_rows,&new_local_nnz);
7935: }
7936: for (i=0;i<n_recvs;i++) {
7937: PetscInt j;
7938: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7939: for (j=0;j<*(ptr_idxs+1);j++) {
7940: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7941: }
7942: } else {
7943: /* TODO */
7944: }
7945: ptr_idxs += olengths_idxs[i];
7946: }
7947: if (new_local_nnz) {
7948: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7949: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7950: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7951: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7952: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7953: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7954: } else {
7955: MatSetUp(local_mat);
7956: }
7957: PetscFree(new_local_nnz);
7958: } else {
7959: MatSetUp(local_mat);
7960: }
7962: /* set values */
7963: ptr_vals = recv_buffer_vals;
7964: ptr_idxs = recv_buffer_idxs_local;
7965: for (i=0;i<n_recvs;i++) {
7966: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7967: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7968: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7969: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7970: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7971: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7972: } else {
7973: /* TODO */
7974: }
7975: ptr_idxs += olengths_idxs[i];
7976: ptr_vals += olengths_vals[i];
7977: }
7978: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7979: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7980: MatISRestoreLocalMat(*mat_n,&local_mat);
7981: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7982: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7983: PetscFree(recv_buffer_vals);
7985: #if 0
7986: if (!restrict_comm) { /* check */
7987: Vec lvec,rvec;
7988: PetscReal infty_error;
7990: MatCreateVecs(mat,&rvec,&lvec);
7991: VecSetRandom(rvec,NULL);
7992: MatMult(mat,rvec,lvec);
7993: VecScale(lvec,-1.0);
7994: MatMultAdd(*mat_n,rvec,lvec,lvec);
7995: VecNorm(lvec,NORM_INFINITY,&infty_error);
7996: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7997: VecDestroy(&rvec);
7998: VecDestroy(&lvec);
7999: }
8000: #endif
8002: /* assemble new additional is (if any) */
8003: if (nis) {
8004: PetscInt **temp_idxs,*count_is,j,psum;
8006: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
8007: PetscCalloc1(nis,&count_is);
8008: ptr_idxs = recv_buffer_idxs_is;
8009: psum = 0;
8010: for (i=0;i<n_recvs;i++) {
8011: for (j=0;j<nis;j++) {
8012: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8013: count_is[j] += plen; /* increment counting of buffer for j-th IS */
8014: psum += plen;
8015: ptr_idxs += plen+1; /* shift pointer to received data */
8016: }
8017: }
8018: PetscMalloc1(nis,&temp_idxs);
8019: PetscMalloc1(psum,&temp_idxs[0]);
8020: for (i=1;i<nis;i++) {
8021: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
8022: }
8023: PetscArrayzero(count_is,nis);
8024: ptr_idxs = recv_buffer_idxs_is;
8025: for (i=0;i<n_recvs;i++) {
8026: for (j=0;j<nis;j++) {
8027: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8028: PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);
8029: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
8030: ptr_idxs += plen+1; /* shift pointer to received data */
8031: }
8032: }
8033: for (i=0;i<nis;i++) {
8034: ISDestroy(&isarray[i]);
8035: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
8036: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
8037: }
8038: PetscFree(count_is);
8039: PetscFree(temp_idxs[0]);
8040: PetscFree(temp_idxs);
8041: }
8042: /* free workspace */
8043: PetscFree(recv_buffer_idxs_is);
8044: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
8045: PetscFree(send_buffer_idxs);
8046: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
8047: if (isdense) {
8048: MatISGetLocalMat(mat,&local_mat);
8049: MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);
8050: MatISRestoreLocalMat(mat,&local_mat);
8051: } else {
8052: /* PetscFree(send_buffer_vals); */
8053: }
8054: if (nis) {
8055: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
8056: PetscFree(send_buffer_idxs_is);
8057: }
8059: if (nvecs) {
8060: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
8061: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
8062: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8063: VecDestroy(&nnsp_vec[0]);
8064: VecCreate(comm_n,&nnsp_vec[0]);
8065: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
8066: VecSetType(nnsp_vec[0],VECSTANDARD);
8067: /* set values */
8068: ptr_vals = recv_buffer_vecs;
8069: ptr_idxs = recv_buffer_idxs_local;
8070: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
8071: for (i=0;i<n_recvs;i++) {
8072: PetscInt j;
8073: for (j=0;j<*(ptr_idxs+1);j++) {
8074: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
8075: }
8076: ptr_idxs += olengths_idxs[i];
8077: ptr_vals += olengths_idxs[i]-2;
8078: }
8079: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8080: VecAssemblyBegin(nnsp_vec[0]);
8081: VecAssemblyEnd(nnsp_vec[0]);
8082: }
8084: PetscFree(recv_buffer_vecs);
8085: PetscFree(recv_buffer_idxs_local);
8086: PetscFree(recv_req_idxs);
8087: PetscFree(recv_req_vals);
8088: PetscFree(recv_req_vecs);
8089: PetscFree(recv_req_idxs_is);
8090: PetscFree(send_req_idxs);
8091: PetscFree(send_req_vals);
8092: PetscFree(send_req_vecs);
8093: PetscFree(send_req_idxs_is);
8094: PetscFree(ilengths_vals);
8095: PetscFree(ilengths_idxs);
8096: PetscFree(olengths_vals);
8097: PetscFree(olengths_idxs);
8098: PetscFree(onodes);
8099: if (nis) {
8100: PetscFree(ilengths_idxs_is);
8101: PetscFree(olengths_idxs_is);
8102: PetscFree(onodes_is);
8103: }
8104: PetscSubcommDestroy(&subcomm);
8105: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
8106: MatDestroy(mat_n);
8107: for (i=0;i<nis;i++) {
8108: ISDestroy(&isarray[i]);
8109: }
8110: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8111: VecDestroy(&nnsp_vec[0]);
8112: }
8113: *mat_n = NULL;
8114: }
8115: return(0);
8116: }
8118: /* temporary hack into ksp private data structure */
8119: #include <petsc/private/kspimpl.h>
8121: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8122: {
8123: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
8124: PC_IS *pcis = (PC_IS*)pc->data;
8125: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
8126: Mat coarsedivudotp = NULL;
8127: Mat coarseG,t_coarse_mat_is;
8128: MatNullSpace CoarseNullSpace = NULL;
8129: ISLocalToGlobalMapping coarse_islg;
8130: IS coarse_is,*isarray,corners;
8131: PetscInt i,im_active=-1,active_procs=-1;
8132: PetscInt nis,nisdofs,nisneu,nisvert;
8133: PetscInt coarse_eqs_per_proc;
8134: PC pc_temp;
8135: PCType coarse_pc_type;
8136: KSPType coarse_ksp_type;
8137: PetscBool multilevel_requested,multilevel_allowed;
8138: PetscBool coarse_reuse;
8139: PetscInt ncoarse,nedcfield;
8140: PetscBool compute_vecs = PETSC_FALSE;
8141: PetscScalar *array;
8142: MatReuse coarse_mat_reuse;
8143: PetscBool restr, full_restr, have_void;
8144: PetscMPIInt size;
8145: PetscErrorCode ierr;
8148: PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8149: /* Assign global numbering to coarse dofs */
8150: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8151: PetscInt ocoarse_size;
8152: compute_vecs = PETSC_TRUE;
8154: pcbddc->new_primal_space = PETSC_TRUE;
8155: ocoarse_size = pcbddc->coarse_size;
8156: PetscFree(pcbddc->global_primal_indices);
8157: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8158: /* see if we can avoid some work */
8159: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8160: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8161: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8162: KSPReset(pcbddc->coarse_ksp);
8163: coarse_reuse = PETSC_FALSE;
8164: } else { /* we can safely reuse already computed coarse matrix */
8165: coarse_reuse = PETSC_TRUE;
8166: }
8167: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8168: coarse_reuse = PETSC_FALSE;
8169: }
8170: /* reset any subassembling information */
8171: if (!coarse_reuse || pcbddc->recompute_topography) {
8172: ISDestroy(&pcbddc->coarse_subassembling);
8173: }
8174: } else { /* primal space is unchanged, so we can reuse coarse matrix */
8175: coarse_reuse = PETSC_TRUE;
8176: }
8177: if (coarse_reuse && pcbddc->coarse_ksp) {
8178: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8179: PetscObjectReference((PetscObject)coarse_mat);
8180: coarse_mat_reuse = MAT_REUSE_MATRIX;
8181: } else {
8182: coarse_mat = NULL;
8183: coarse_mat_reuse = MAT_INITIAL_MATRIX;
8184: }
8186: /* creates temporary l2gmap and IS for coarse indexes */
8187: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8188: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
8190: /* creates temporary MATIS object for coarse matrix */
8191: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8192: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
8193: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8194: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8195: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8196: MatDestroy(&coarse_submat_dense);
8198: /* count "active" (i.e. with positive local size) and "void" processes */
8199: im_active = !!(pcis->n);
8200: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8202: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8203: /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8204: /* full_restr : just use the receivers from the subassembling pattern */
8205: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8206: coarse_mat_is = NULL;
8207: multilevel_allowed = PETSC_FALSE;
8208: multilevel_requested = PETSC_FALSE;
8209: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8210: if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8211: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8212: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8213: if (multilevel_requested) {
8214: ncoarse = active_procs/pcbddc->coarsening_ratio;
8215: restr = PETSC_FALSE;
8216: full_restr = PETSC_FALSE;
8217: } else {
8218: ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8219: restr = PETSC_TRUE;
8220: full_restr = PETSC_TRUE;
8221: }
8222: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8223: ncoarse = PetscMax(1,ncoarse);
8224: if (!pcbddc->coarse_subassembling) {
8225: if (pcbddc->coarsening_ratio > 1) {
8226: if (multilevel_requested) {
8227: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8228: } else {
8229: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8230: }
8231: } else {
8232: PetscMPIInt rank;
8234: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8235: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8236: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8237: }
8238: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8239: PetscInt psum;
8240: if (pcbddc->coarse_ksp) psum = 1;
8241: else psum = 0;
8242: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8243: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8244: }
8245: /* determine if we can go multilevel */
8246: if (multilevel_requested) {
8247: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8248: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8249: }
8250: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8252: /* dump subassembling pattern */
8253: if (pcbddc->dbg_flag && multilevel_allowed) {
8254: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8255: }
8256: /* compute dofs splitting and neumann boundaries for coarse dofs */
8257: nedcfield = -1;
8258: corners = NULL;
8259: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8260: PetscInt *tidxs,*tidxs2,nout,tsize,i;
8261: const PetscInt *idxs;
8262: ISLocalToGlobalMapping tmap;
8264: /* create map between primal indices (in local representative ordering) and local primal numbering */
8265: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8266: /* allocate space for temporary storage */
8267: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8268: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8269: /* allocate for IS array */
8270: nisdofs = pcbddc->n_ISForDofsLocal;
8271: if (pcbddc->nedclocal) {
8272: if (pcbddc->nedfield > -1) {
8273: nedcfield = pcbddc->nedfield;
8274: } else {
8275: nedcfield = 0;
8276: if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8277: nisdofs = 1;
8278: }
8279: }
8280: nisneu = !!pcbddc->NeumannBoundariesLocal;
8281: nisvert = 0; /* nisvert is not used */
8282: nis = nisdofs + nisneu + nisvert;
8283: PetscMalloc1(nis,&isarray);
8284: /* dofs splitting */
8285: for (i=0;i<nisdofs;i++) {
8286: /* ISView(pcbddc->ISForDofsLocal[i],0); */
8287: if (nedcfield != i) {
8288: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8289: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8290: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8291: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8292: } else {
8293: ISGetLocalSize(pcbddc->nedclocal,&tsize);
8294: ISGetIndices(pcbddc->nedclocal,&idxs);
8295: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8296: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8297: ISRestoreIndices(pcbddc->nedclocal,&idxs);
8298: }
8299: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8300: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8301: /* ISView(isarray[i],0); */
8302: }
8303: /* neumann boundaries */
8304: if (pcbddc->NeumannBoundariesLocal) {
8305: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8306: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8307: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8308: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8309: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8310: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8311: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8312: /* ISView(isarray[nisdofs],0); */
8313: }
8314: /* coordinates */
8315: if (pcbddc->corner_selected) {
8316: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8317: ISGetLocalSize(corners,&tsize);
8318: ISGetIndices(corners,&idxs);
8319: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8320: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8321: ISRestoreIndices(corners,&idxs);
8322: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8323: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8324: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8325: }
8326: PetscFree(tidxs);
8327: PetscFree(tidxs2);
8328: ISLocalToGlobalMappingDestroy(&tmap);
8329: } else {
8330: nis = 0;
8331: nisdofs = 0;
8332: nisneu = 0;
8333: nisvert = 0;
8334: isarray = NULL;
8335: }
8336: /* destroy no longer needed map */
8337: ISLocalToGlobalMappingDestroy(&coarse_islg);
8339: /* subassemble */
8340: if (multilevel_allowed) {
8341: Vec vp[1];
8342: PetscInt nvecs = 0;
8343: PetscBool reuse,reuser;
8345: if (coarse_mat) reuse = PETSC_TRUE;
8346: else reuse = PETSC_FALSE;
8347: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8348: vp[0] = NULL;
8349: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8350: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8351: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8352: VecSetType(vp[0],VECSTANDARD);
8353: nvecs = 1;
8355: if (pcbddc->divudotp) {
8356: Mat B,loc_divudotp;
8357: Vec v,p;
8358: IS dummy;
8359: PetscInt np;
8361: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8362: MatGetSize(loc_divudotp,&np,NULL);
8363: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8364: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8365: MatCreateVecs(B,&v,&p);
8366: VecSet(p,1.);
8367: MatMultTranspose(B,p,v);
8368: VecDestroy(&p);
8369: MatDestroy(&B);
8370: VecGetArray(vp[0],&array);
8371: VecPlaceArray(pcbddc->vec1_P,array);
8372: VecRestoreArray(vp[0],&array);
8373: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8374: VecResetArray(pcbddc->vec1_P);
8375: ISDestroy(&dummy);
8376: VecDestroy(&v);
8377: }
8378: }
8379: if (reuser) {
8380: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8381: } else {
8382: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8383: }
8384: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8385: PetscScalar *arraym;
8386: const PetscScalar *arrayv;
8387: PetscInt nl;
8388: VecGetLocalSize(vp[0],&nl);
8389: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8390: MatDenseGetArray(coarsedivudotp,&arraym);
8391: VecGetArrayRead(vp[0],&arrayv);
8392: PetscArraycpy(arraym,arrayv,nl);
8393: VecRestoreArrayRead(vp[0],&arrayv);
8394: MatDenseRestoreArray(coarsedivudotp,&arraym);
8395: VecDestroy(&vp[0]);
8396: } else {
8397: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8398: }
8399: } else {
8400: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8401: }
8402: if (coarse_mat_is || coarse_mat) {
8403: if (!multilevel_allowed) {
8404: MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8405: } else {
8406: /* if this matrix is present, it means we are not reusing the coarse matrix */
8407: if (coarse_mat_is) {
8408: if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8409: PetscObjectReference((PetscObject)coarse_mat_is);
8410: coarse_mat = coarse_mat_is;
8411: }
8412: }
8413: }
8414: MatDestroy(&t_coarse_mat_is);
8415: MatDestroy(&coarse_mat_is);
8417: /* create local to global scatters for coarse problem */
8418: if (compute_vecs) {
8419: PetscInt lrows;
8420: VecDestroy(&pcbddc->coarse_vec);
8421: if (coarse_mat) {
8422: MatGetLocalSize(coarse_mat,&lrows,NULL);
8423: } else {
8424: lrows = 0;
8425: }
8426: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8427: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8428: VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8429: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8430: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8431: }
8432: ISDestroy(&coarse_is);
8434: /* set defaults for coarse KSP and PC */
8435: if (multilevel_allowed) {
8436: coarse_ksp_type = KSPRICHARDSON;
8437: coarse_pc_type = PCBDDC;
8438: } else {
8439: coarse_ksp_type = KSPPREONLY;
8440: coarse_pc_type = PCREDUNDANT;
8441: }
8443: /* print some info if requested */
8444: if (pcbddc->dbg_flag) {
8445: if (!multilevel_allowed) {
8446: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8447: if (multilevel_requested) {
8448: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8449: } else if (pcbddc->max_levels) {
8450: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8451: }
8452: PetscViewerFlush(pcbddc->dbg_viewer);
8453: }
8454: }
8456: /* communicate coarse discrete gradient */
8457: coarseG = NULL;
8458: if (pcbddc->nedcG && multilevel_allowed) {
8459: MPI_Comm ccomm;
8460: if (coarse_mat) {
8461: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8462: } else {
8463: ccomm = MPI_COMM_NULL;
8464: }
8465: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8466: }
8468: /* create the coarse KSP object only once with defaults */
8469: if (coarse_mat) {
8470: PetscBool isredundant,isbddc,force,valid;
8471: PetscViewer dbg_viewer = NULL;
8473: if (pcbddc->dbg_flag) {
8474: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8475: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8476: }
8477: if (!pcbddc->coarse_ksp) {
8478: char prefix[256],str_level[16];
8479: size_t len;
8481: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8482: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8483: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8484: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8485: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8486: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8487: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8488: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8489: /* TODO is this logic correct? should check for coarse_mat type */
8490: PCSetType(pc_temp,coarse_pc_type);
8491: /* prefix */
8492: PetscStrcpy(prefix,"");
8493: PetscStrcpy(str_level,"");
8494: if (!pcbddc->current_level) {
8495: PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8496: PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8497: } else {
8498: PetscStrlen(((PetscObject)pc)->prefix,&len);
8499: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8500: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8501: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8502: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8503: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8504: PetscStrlcat(prefix,str_level,sizeof(prefix));
8505: }
8506: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8507: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8508: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8509: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8510: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8511: /* allow user customization */
8512: KSPSetFromOptions(pcbddc->coarse_ksp);
8513: /* get some info after set from options */
8514: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8515: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8516: force = PETSC_FALSE;
8517: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8518: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8519: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8520: if (multilevel_allowed && !force && !valid) {
8521: isbddc = PETSC_TRUE;
8522: PCSetType(pc_temp,PCBDDC);
8523: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8524: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8525: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8526: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8527: PetscObjectOptionsBegin((PetscObject)pc_temp);
8528: (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8529: PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8530: PetscOptionsEnd();
8531: pc_temp->setfromoptionscalled++;
8532: }
8533: }
8534: }
8535: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8536: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8537: if (nisdofs) {
8538: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8539: for (i=0;i<nisdofs;i++) {
8540: ISDestroy(&isarray[i]);
8541: }
8542: }
8543: if (nisneu) {
8544: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8545: ISDestroy(&isarray[nisdofs]);
8546: }
8547: if (nisvert) {
8548: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8549: ISDestroy(&isarray[nis-1]);
8550: }
8551: if (coarseG) {
8552: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8553: }
8555: /* get some info after set from options */
8556: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8558: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8559: if (isbddc && !multilevel_allowed) {
8560: PCSetType(pc_temp,coarse_pc_type);
8561: }
8562: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8563: force = PETSC_FALSE;
8564: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8565: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8566: if (multilevel_requested && multilevel_allowed && !valid && !force) {
8567: PCSetType(pc_temp,PCBDDC);
8568: }
8569: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8570: if (isredundant) {
8571: KSP inner_ksp;
8572: PC inner_pc;
8574: PCRedundantGetKSP(pc_temp,&inner_ksp);
8575: KSPGetPC(inner_ksp,&inner_pc);
8576: }
8578: /* parameters which miss an API */
8579: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8580: if (isbddc) {
8581: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8583: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8584: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8585: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8586: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8587: if (pcbddc_coarse->benign_saddle_point) {
8588: Mat coarsedivudotp_is;
8589: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8590: IS row,col;
8591: const PetscInt *gidxs;
8592: PetscInt n,st,M,N;
8594: MatGetSize(coarsedivudotp,&n,NULL);
8595: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8596: st = st-n;
8597: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8598: MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8599: ISLocalToGlobalMappingGetSize(l2gmap,&n);
8600: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8601: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8602: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8603: ISLocalToGlobalMappingCreateIS(row,&rl2g);
8604: ISLocalToGlobalMappingCreateIS(col,&cl2g);
8605: ISGetSize(row,&M);
8606: MatGetSize(coarse_mat,&N,NULL);
8607: ISDestroy(&row);
8608: ISDestroy(&col);
8609: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8610: MatSetType(coarsedivudotp_is,MATIS);
8611: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8612: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8613: ISLocalToGlobalMappingDestroy(&rl2g);
8614: ISLocalToGlobalMappingDestroy(&cl2g);
8615: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8616: MatDestroy(&coarsedivudotp);
8617: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8618: MatDestroy(&coarsedivudotp_is);
8619: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8620: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8621: }
8622: }
8624: /* propagate symmetry info of coarse matrix */
8625: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8626: if (pc->pmat->symmetric_set) {
8627: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8628: }
8629: if (pc->pmat->hermitian_set) {
8630: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8631: }
8632: if (pc->pmat->spd_set) {
8633: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8634: }
8635: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8636: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8637: }
8638: /* set operators */
8639: MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8640: MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8641: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8642: if (pcbddc->dbg_flag) {
8643: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8644: }
8645: }
8646: MatDestroy(&coarseG);
8647: PetscFree(isarray);
8648: #if 0
8649: {
8650: PetscViewer viewer;
8651: char filename[256];
8652: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8653: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8654: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8655: MatView(coarse_mat,viewer);
8656: PetscViewerPopFormat(viewer);
8657: PetscViewerDestroy(&viewer);
8658: }
8659: #endif
8661: if (corners) {
8662: Vec gv;
8663: IS is;
8664: const PetscInt *idxs;
8665: PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8666: PetscScalar *coords;
8668: if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8669: VecGetSize(pcbddc->coarse_vec,&N);
8670: VecGetLocalSize(pcbddc->coarse_vec,&n);
8671: VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8672: VecSetBlockSize(gv,cdim);
8673: VecSetSizes(gv,n*cdim,N*cdim);
8674: VecSetType(gv,VECSTANDARD);
8675: VecSetFromOptions(gv);
8676: VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */
8678: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8679: ISGetLocalSize(is,&n);
8680: ISGetIndices(is,&idxs);
8681: PetscMalloc1(n*cdim,&coords);
8682: for (i=0;i<n;i++) {
8683: for (d=0;d<cdim;d++) {
8684: coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8685: }
8686: }
8687: ISRestoreIndices(is,&idxs);
8688: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8690: ISGetLocalSize(corners,&n);
8691: ISGetIndices(corners,&idxs);
8692: VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8693: ISRestoreIndices(corners,&idxs);
8694: PetscFree(coords);
8695: VecAssemblyBegin(gv);
8696: VecAssemblyEnd(gv);
8697: VecGetArray(gv,&coords);
8698: if (pcbddc->coarse_ksp) {
8699: PC coarse_pc;
8700: PetscBool isbddc;
8702: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8703: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8704: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8705: PetscReal *realcoords;
8707: VecGetLocalSize(gv,&n);
8708: #if defined(PETSC_USE_COMPLEX)
8709: PetscMalloc1(n,&realcoords);
8710: for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8711: #else
8712: realcoords = coords;
8713: #endif
8714: PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8715: #if defined(PETSC_USE_COMPLEX)
8716: PetscFree(realcoords);
8717: #endif
8718: }
8719: }
8720: VecRestoreArray(gv,&coords);
8721: VecDestroy(&gv);
8722: }
8723: ISDestroy(&corners);
8725: if (pcbddc->coarse_ksp) {
8726: Vec crhs,csol;
8728: KSPGetSolution(pcbddc->coarse_ksp,&csol);
8729: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8730: if (!csol) {
8731: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8732: }
8733: if (!crhs) {
8734: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8735: }
8736: }
8737: MatDestroy(&coarsedivudotp);
8739: /* compute null space for coarse solver if the benign trick has been requested */
8740: if (pcbddc->benign_null) {
8742: VecSet(pcbddc->vec1_P,0.);
8743: for (i=0;i<pcbddc->benign_n;i++) {
8744: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8745: }
8746: VecAssemblyBegin(pcbddc->vec1_P);
8747: VecAssemblyEnd(pcbddc->vec1_P);
8748: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8749: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8750: if (coarse_mat) {
8751: Vec nullv;
8752: PetscScalar *array,*array2;
8753: PetscInt nl;
8755: MatCreateVecs(coarse_mat,&nullv,NULL);
8756: VecGetLocalSize(nullv,&nl);
8757: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8758: VecGetArray(nullv,&array2);
8759: PetscArraycpy(array2,array,nl);
8760: VecRestoreArray(nullv,&array2);
8761: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8762: VecNormalize(nullv,NULL);
8763: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8764: VecDestroy(&nullv);
8765: }
8766: }
8767: PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8769: PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8770: if (pcbddc->coarse_ksp) {
8771: PetscBool ispreonly;
8773: if (CoarseNullSpace) {
8774: PetscBool isnull;
8775: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8776: if (isnull) {
8777: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8778: }
8779: /* TODO: add local nullspaces (if any) */
8780: }
8781: /* setup coarse ksp */
8782: KSPSetUp(pcbddc->coarse_ksp);
8783: /* Check coarse problem if in debug mode or if solving with an iterative method */
8784: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8785: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8786: KSP check_ksp;
8787: KSPType check_ksp_type;
8788: PC check_pc;
8789: Vec check_vec,coarse_vec;
8790: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8791: PetscInt its;
8792: PetscBool compute_eigs;
8793: PetscReal *eigs_r,*eigs_c;
8794: PetscInt neigs;
8795: const char *prefix;
8797: /* Create ksp object suitable for estimation of extreme eigenvalues */
8798: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8799: PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8800: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8801: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8802: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8803: /* prevent from setup unneeded object */
8804: KSPGetPC(check_ksp,&check_pc);
8805: PCSetType(check_pc,PCNONE);
8806: if (ispreonly) {
8807: check_ksp_type = KSPPREONLY;
8808: compute_eigs = PETSC_FALSE;
8809: } else {
8810: check_ksp_type = KSPGMRES;
8811: compute_eigs = PETSC_TRUE;
8812: }
8813: KSPSetType(check_ksp,check_ksp_type);
8814: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8815: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8816: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8817: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8818: KSPSetOptionsPrefix(check_ksp,prefix);
8819: KSPAppendOptionsPrefix(check_ksp,"check_");
8820: KSPSetFromOptions(check_ksp);
8821: KSPSetUp(check_ksp);
8822: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8823: KSPSetPC(check_ksp,check_pc);
8824: /* create random vec */
8825: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8826: VecSetRandom(check_vec,NULL);
8827: MatMult(coarse_mat,check_vec,coarse_vec);
8828: /* solve coarse problem */
8829: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8830: KSPCheckSolve(check_ksp,pc,coarse_vec);
8831: /* set eigenvalue estimation if preonly has not been requested */
8832: if (compute_eigs) {
8833: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8834: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8835: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8836: if (neigs) {
8837: lambda_max = eigs_r[neigs-1];
8838: lambda_min = eigs_r[0];
8839: if (pcbddc->use_coarse_estimates) {
8840: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8841: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8842: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8843: }
8844: }
8845: }
8846: }
8848: /* check coarse problem residual error */
8849: if (pcbddc->dbg_flag) {
8850: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8851: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8852: VecAXPY(check_vec,-1.0,coarse_vec);
8853: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8854: MatMult(coarse_mat,check_vec,coarse_vec);
8855: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8856: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8857: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8858: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8859: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8860: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8861: if (CoarseNullSpace) {
8862: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8863: }
8864: if (compute_eigs) {
8865: PetscReal lambda_max_s,lambda_min_s;
8866: KSPConvergedReason reason;
8867: KSPGetType(check_ksp,&check_ksp_type);
8868: KSPGetIterationNumber(check_ksp,&its);
8869: KSPGetConvergedReason(check_ksp,&reason);
8870: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8871: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8872: for (i=0;i<neigs;i++) {
8873: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8874: }
8875: }
8876: PetscViewerFlush(dbg_viewer);
8877: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8878: }
8879: VecDestroy(&check_vec);
8880: VecDestroy(&coarse_vec);
8881: KSPDestroy(&check_ksp);
8882: if (compute_eigs) {
8883: PetscFree(eigs_r);
8884: PetscFree(eigs_c);
8885: }
8886: }
8887: }
8888: MatNullSpaceDestroy(&CoarseNullSpace);
8889: /* print additional info */
8890: if (pcbddc->dbg_flag) {
8891: /* waits until all processes reaches this point */
8892: PetscBarrier((PetscObject)pc);
8893: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8894: PetscViewerFlush(pcbddc->dbg_viewer);
8895: }
8897: /* free memory */
8898: MatDestroy(&coarse_mat);
8899: PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8900: return(0);
8901: }
8903: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8904: {
8905: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8906: PC_IS* pcis = (PC_IS*)pc->data;
8907: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8908: IS subset,subset_mult,subset_n;
8909: PetscInt local_size,coarse_size=0;
8910: PetscInt *local_primal_indices=NULL;
8911: const PetscInt *t_local_primal_indices;
8915: /* Compute global number of coarse dofs */
8916: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8917: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8918: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8919: ISDestroy(&subset_n);
8920: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8921: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8922: ISDestroy(&subset);
8923: ISDestroy(&subset_mult);
8924: ISGetLocalSize(subset_n,&local_size);
8925: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8926: PetscMalloc1(local_size,&local_primal_indices);
8927: ISGetIndices(subset_n,&t_local_primal_indices);
8928: PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);
8929: ISRestoreIndices(subset_n,&t_local_primal_indices);
8930: ISDestroy(&subset_n);
8932: /* check numbering */
8933: if (pcbddc->dbg_flag) {
8934: PetscScalar coarsesum,*array,*array2;
8935: PetscInt i;
8936: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8938: PetscViewerFlush(pcbddc->dbg_viewer);
8939: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8940: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8941: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8942: /* counter */
8943: VecSet(pcis->vec1_global,0.0);
8944: VecSet(pcis->vec1_N,1.0);
8945: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8946: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8947: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8948: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8949: VecSet(pcis->vec1_N,0.0);
8950: for (i=0;i<pcbddc->local_primal_size;i++) {
8951: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8952: }
8953: VecAssemblyBegin(pcis->vec1_N);
8954: VecAssemblyEnd(pcis->vec1_N);
8955: VecSet(pcis->vec1_global,0.0);
8956: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8957: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8958: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8959: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8960: VecGetArray(pcis->vec1_N,&array);
8961: VecGetArray(pcis->vec2_N,&array2);
8962: for (i=0;i<pcis->n;i++) {
8963: if (array[i] != 0.0 && array[i] != array2[i]) {
8964: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8965: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8966: set_error = PETSC_TRUE;
8967: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8968: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8969: }
8970: }
8971: VecRestoreArray(pcis->vec2_N,&array2);
8972: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8973: PetscViewerFlush(pcbddc->dbg_viewer);
8974: for (i=0;i<pcis->n;i++) {
8975: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8976: }
8977: VecRestoreArray(pcis->vec1_N,&array);
8978: VecSet(pcis->vec1_global,0.0);
8979: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8980: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8981: VecSum(pcis->vec1_global,&coarsesum);
8982: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8983: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8984: PetscInt *gidxs;
8986: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8987: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8988: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8989: PetscViewerFlush(pcbddc->dbg_viewer);
8990: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8991: for (i=0;i<pcbddc->local_primal_size;i++) {
8992: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8993: }
8994: PetscViewerFlush(pcbddc->dbg_viewer);
8995: PetscFree(gidxs);
8996: }
8997: PetscViewerFlush(pcbddc->dbg_viewer);
8998: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8999: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
9000: }
9002: /* get back data */
9003: *coarse_size_n = coarse_size;
9004: *local_primal_indices_n = local_primal_indices;
9005: return(0);
9006: }
9008: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
9009: {
9010: IS localis_t;
9011: PetscInt i,lsize,*idxs,n;
9012: PetscScalar *vals;
9016: /* get indices in local ordering exploiting local to global map */
9017: ISGetLocalSize(globalis,&lsize);
9018: PetscMalloc1(lsize,&vals);
9019: for (i=0;i<lsize;i++) vals[i] = 1.0;
9020: ISGetIndices(globalis,(const PetscInt**)&idxs);
9021: VecSet(gwork,0.0);
9022: VecSet(lwork,0.0);
9023: if (idxs) { /* multilevel guard */
9024: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
9025: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
9026: }
9027: VecAssemblyBegin(gwork);
9028: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
9029: PetscFree(vals);
9030: VecAssemblyEnd(gwork);
9031: /* now compute set in local ordering */
9032: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9033: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9034: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
9035: VecGetSize(lwork,&n);
9036: for (i=0,lsize=0;i<n;i++) {
9037: if (PetscRealPart(vals[i]) > 0.5) {
9038: lsize++;
9039: }
9040: }
9041: PetscMalloc1(lsize,&idxs);
9042: for (i=0,lsize=0;i<n;i++) {
9043: if (PetscRealPart(vals[i]) > 0.5) {
9044: idxs[lsize++] = i;
9045: }
9046: }
9047: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
9048: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
9049: *localis = localis_t;
9050: return(0);
9051: }
9053: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9054: {
9055: PC_IS *pcis=(PC_IS*)pc->data;
9056: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9057: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
9058: Mat S_j;
9059: PetscInt *used_xadj,*used_adjncy;
9060: PetscBool free_used_adj;
9061: PetscErrorCode ierr;
9064: PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9065: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9066: free_used_adj = PETSC_FALSE;
9067: if (pcbddc->sub_schurs_layers == -1) {
9068: used_xadj = NULL;
9069: used_adjncy = NULL;
9070: } else {
9071: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9072: used_xadj = pcbddc->mat_graph->xadj;
9073: used_adjncy = pcbddc->mat_graph->adjncy;
9074: } else if (pcbddc->computed_rowadj) {
9075: used_xadj = pcbddc->mat_graph->xadj;
9076: used_adjncy = pcbddc->mat_graph->adjncy;
9077: } else {
9078: PetscBool flg_row=PETSC_FALSE;
9079: const PetscInt *xadj,*adjncy;
9080: PetscInt nvtxs;
9082: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9083: if (flg_row) {
9084: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
9085: PetscArraycpy(used_xadj,xadj,nvtxs+1);
9086: PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);
9087: free_used_adj = PETSC_TRUE;
9088: } else {
9089: pcbddc->sub_schurs_layers = -1;
9090: used_xadj = NULL;
9091: used_adjncy = NULL;
9092: }
9093: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9094: }
9095: }
9097: /* setup sub_schurs data */
9098: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9099: if (!sub_schurs->schur_explicit) {
9100: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9101: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9102: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
9103: } else {
9104: Mat change = NULL;
9105: Vec scaling = NULL;
9106: IS change_primal = NULL, iP;
9107: PetscInt benign_n;
9108: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9109: PetscBool need_change = PETSC_FALSE;
9110: PetscBool discrete_harmonic = PETSC_FALSE;
9112: if (!pcbddc->use_vertices && reuse_solvers) {
9113: PetscInt n_vertices;
9115: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
9116: reuse_solvers = (PetscBool)!n_vertices;
9117: }
9118: if (!pcbddc->benign_change_explicit) {
9119: benign_n = pcbddc->benign_n;
9120: } else {
9121: benign_n = 0;
9122: }
9123: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9124: We need a global reduction to avoid possible deadlocks.
9125: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9126: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9127: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9128: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9129: need_change = (PetscBool)(!need_change);
9130: }
9131: /* If the user defines additional constraints, we import them here.
9132: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9133: if (need_change) {
9134: PC_IS *pcisf;
9135: PC_BDDC *pcbddcf;
9136: PC pcf;
9138: if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9139: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9140: PCSetOperators(pcf,pc->mat,pc->pmat);
9141: PCSetType(pcf,PCBDDC);
9143: /* hacks */
9144: pcisf = (PC_IS*)pcf->data;
9145: pcisf->is_B_local = pcis->is_B_local;
9146: pcisf->vec1_N = pcis->vec1_N;
9147: pcisf->BtoNmap = pcis->BtoNmap;
9148: pcisf->n = pcis->n;
9149: pcisf->n_B = pcis->n_B;
9150: pcbddcf = (PC_BDDC*)pcf->data;
9151: PetscFree(pcbddcf->mat_graph);
9152: pcbddcf->mat_graph = pcbddc->mat_graph;
9153: pcbddcf->use_faces = PETSC_TRUE;
9154: pcbddcf->use_change_of_basis = PETSC_TRUE;
9155: pcbddcf->use_change_on_faces = PETSC_TRUE;
9156: pcbddcf->use_qr_single = PETSC_TRUE;
9157: pcbddcf->fake_change = PETSC_TRUE;
9159: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9160: PCBDDCConstraintsSetUp(pcf);
9161: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9162: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9163: change = pcbddcf->ConstraintMatrix;
9164: pcbddcf->ConstraintMatrix = NULL;
9166: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9167: PetscFree(pcbddcf->sub_schurs);
9168: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9169: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9170: PetscFree(pcbddcf->primal_indices_local_idxs);
9171: PetscFree(pcbddcf->onearnullvecs_state);
9172: PetscFree(pcf->data);
9173: pcf->ops->destroy = NULL;
9174: pcf->ops->reset = NULL;
9175: PCDestroy(&pcf);
9176: }
9177: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9179: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9180: if (iP) {
9181: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9182: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9183: PetscOptionsEnd();
9184: }
9185: if (discrete_harmonic) {
9186: Mat A;
9187: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9188: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9189: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9190: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9191: MatDestroy(&A);
9192: } else {
9193: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9194: }
9195: MatDestroy(&change);
9196: ISDestroy(&change_primal);
9197: }
9198: MatDestroy(&S_j);
9200: /* free adjacency */
9201: if (free_used_adj) {
9202: PetscFree2(used_xadj,used_adjncy);
9203: }
9204: PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9205: return(0);
9206: }
9208: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9209: {
9210: PC_IS *pcis=(PC_IS*)pc->data;
9211: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9212: PCBDDCGraph graph;
9213: PetscErrorCode ierr;
9216: /* attach interface graph for determining subsets */
9217: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9218: IS verticesIS,verticescomm;
9219: PetscInt vsize,*idxs;
9221: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9222: ISGetSize(verticesIS,&vsize);
9223: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9224: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9225: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9226: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9227: PCBDDCGraphCreate(&graph);
9228: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9229: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9230: ISDestroy(&verticescomm);
9231: PCBDDCGraphComputeConnectedComponents(graph);
9232: } else {
9233: graph = pcbddc->mat_graph;
9234: }
9235: /* print some info */
9236: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9237: IS vertices;
9238: PetscInt nv,nedges,nfaces;
9239: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9240: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9241: ISGetSize(vertices,&nv);
9242: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9243: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9244: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9245: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9246: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9247: PetscViewerFlush(pcbddc->dbg_viewer);
9248: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9249: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9250: }
9252: /* sub_schurs init */
9253: if (!pcbddc->sub_schurs) {
9254: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9255: }
9256: PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
9258: /* free graph struct */
9259: if (pcbddc->sub_schurs_rebuild) {
9260: PCBDDCGraphDestroy(&graph);
9261: }
9262: return(0);
9263: }
9265: PetscErrorCode PCBDDCCheckOperator(PC pc)
9266: {
9267: PC_IS *pcis=(PC_IS*)pc->data;
9268: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9269: PetscErrorCode ierr;
9272: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9273: IS zerodiag = NULL;
9274: Mat S_j,B0_B=NULL;
9275: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
9276: PetscScalar *p0_check,*array,*array2;
9277: PetscReal norm;
9278: PetscInt i;
9280: /* B0 and B0_B */
9281: if (zerodiag) {
9282: IS dummy;
9284: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9285: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9286: MatCreateVecs(B0_B,NULL,&dummy_vec);
9287: ISDestroy(&dummy);
9288: }
9289: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9290: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9291: VecSet(pcbddc->vec1_P,1.0);
9292: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9293: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9294: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9295: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9296: VecReciprocal(vec_scale_P);
9297: /* S_j */
9298: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9299: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9301: /* mimic vector in \widetilde{W}_\Gamma */
9302: VecSetRandom(pcis->vec1_N,NULL);
9303: /* continuous in primal space */
9304: VecSetRandom(pcbddc->coarse_vec,NULL);
9305: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9306: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9307: VecGetArray(pcbddc->vec1_P,&array);
9308: PetscCalloc1(pcbddc->benign_n,&p0_check);
9309: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9310: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9311: VecRestoreArray(pcbddc->vec1_P,&array);
9312: VecAssemblyBegin(pcis->vec1_N);
9313: VecAssemblyEnd(pcis->vec1_N);
9314: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9315: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9316: VecDuplicate(pcis->vec2_B,&vec_check_B);
9317: VecCopy(pcis->vec2_B,vec_check_B);
9319: /* assemble rhs for coarse problem */
9320: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9321: /* local with Schur */
9322: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9323: if (zerodiag) {
9324: VecGetArray(dummy_vec,&array);
9325: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9326: VecRestoreArray(dummy_vec,&array);
9327: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9328: }
9329: /* sum on primal nodes the local contributions */
9330: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9331: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9332: VecGetArray(pcis->vec1_N,&array);
9333: VecGetArray(pcbddc->vec1_P,&array2);
9334: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9335: VecRestoreArray(pcbddc->vec1_P,&array2);
9336: VecRestoreArray(pcis->vec1_N,&array);
9337: VecSet(pcbddc->coarse_vec,0.);
9338: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9339: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9340: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9341: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9342: VecGetArray(pcbddc->vec1_P,&array);
9343: /* scale primal nodes (BDDC sums contibutions) */
9344: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9345: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9346: VecRestoreArray(pcbddc->vec1_P,&array);
9347: VecAssemblyBegin(pcis->vec1_N);
9348: VecAssemblyEnd(pcis->vec1_N);
9349: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9350: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9351: /* global: \widetilde{B0}_B w_\Gamma */
9352: if (zerodiag) {
9353: MatMult(B0_B,pcis->vec2_B,dummy_vec);
9354: VecGetArray(dummy_vec,&array);
9355: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9356: VecRestoreArray(dummy_vec,&array);
9357: }
9358: /* BDDC */
9359: VecSet(pcis->vec1_D,0.);
9360: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
9362: VecCopy(pcis->vec1_B,pcis->vec2_B);
9363: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9364: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9365: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9366: for (i=0;i<pcbddc->benign_n;i++) {
9367: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9368: }
9369: PetscFree(p0_check);
9370: VecDestroy(&vec_scale_P);
9371: VecDestroy(&vec_check_B);
9372: VecDestroy(&dummy_vec);
9373: MatDestroy(&S_j);
9374: MatDestroy(&B0_B);
9375: }
9376: return(0);
9377: }
9379: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9380: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9381: {
9382: Mat At;
9383: IS rows;
9384: PetscInt rst,ren;
9386: PetscLayout rmap;
9389: rst = ren = 0;
9390: if (ccomm != MPI_COMM_NULL) {
9391: PetscLayoutCreate(ccomm,&rmap);
9392: PetscLayoutSetSize(rmap,A->rmap->N);
9393: PetscLayoutSetBlockSize(rmap,1);
9394: PetscLayoutSetUp(rmap);
9395: PetscLayoutGetRange(rmap,&rst,&ren);
9396: }
9397: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9398: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9399: ISDestroy(&rows);
9401: if (ccomm != MPI_COMM_NULL) {
9402: Mat_MPIAIJ *a,*b;
9403: IS from,to;
9404: Vec gvec;
9405: PetscInt lsize;
9407: MatCreate(ccomm,B);
9408: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9409: MatSetType(*B,MATAIJ);
9410: PetscLayoutDestroy(&((*B)->rmap));
9411: PetscLayoutSetUp((*B)->cmap);
9412: a = (Mat_MPIAIJ*)At->data;
9413: b = (Mat_MPIAIJ*)(*B)->data;
9414: MPI_Comm_size(ccomm,&b->size);
9415: MPI_Comm_rank(ccomm,&b->rank);
9416: PetscObjectReference((PetscObject)a->A);
9417: PetscObjectReference((PetscObject)a->B);
9418: b->A = a->A;
9419: b->B = a->B;
9421: b->donotstash = a->donotstash;
9422: b->roworiented = a->roworiented;
9423: b->rowindices = 0;
9424: b->rowvalues = 0;
9425: b->getrowactive = PETSC_FALSE;
9427: (*B)->rmap = rmap;
9428: (*B)->factortype = A->factortype;
9429: (*B)->assembled = PETSC_TRUE;
9430: (*B)->insertmode = NOT_SET_VALUES;
9431: (*B)->preallocated = PETSC_TRUE;
9433: if (a->colmap) {
9434: #if defined(PETSC_USE_CTABLE)
9435: PetscTableCreateCopy(a->colmap,&b->colmap);
9436: #else
9437: PetscMalloc1(At->cmap->N,&b->colmap);
9438: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9439: PetscArraycpy(b->colmap,a->colmap,At->cmap->N);
9440: #endif
9441: } else b->colmap = 0;
9442: if (a->garray) {
9443: PetscInt len;
9444: len = a->B->cmap->n;
9445: PetscMalloc1(len+1,&b->garray);
9446: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9447: if (len) { PetscArraycpy(b->garray,a->garray,len); }
9448: } else b->garray = 0;
9450: PetscObjectReference((PetscObject)a->lvec);
9451: b->lvec = a->lvec;
9452: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
9454: /* cannot use VecScatterCopy */
9455: VecGetLocalSize(b->lvec,&lsize);
9456: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9457: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9458: MatCreateVecs(*B,&gvec,NULL);
9459: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9460: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9461: ISDestroy(&from);
9462: ISDestroy(&to);
9463: VecDestroy(&gvec);
9464: }
9465: MatDestroy(&At);
9466: return(0);
9467: }