Actual source code: bddcschurs.c
petsc-3.7.1 2016-05-15
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
5: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
6: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
7: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC,Vec,Vec);
8: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC,Vec,Vec);
12: static PetscErrorCode PCBDDCMumpsCorrectionSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
13: {
14: PCBDDCReuseMumps ctx;
15: #if defined(PETSC_HAVE_MUMPS)
16: PetscInt ival;
17: #endif
18: PetscErrorCode ierr;
21: PCShellGetContext(pc,(void **)&ctx);
22: #if defined(PETSC_HAVE_MUMPS)
23: MatMumpsGetIcntl(ctx->F,26,&ival);
24: MatMumpsSetIcntl(ctx->F,26,-1);
25: #endif
26: if (transpose) {
27: MatSolveTranspose(ctx->F,rhs,sol);
28: } else {
29: MatSolve(ctx->F,rhs,sol);
30: }
31: #if defined(PETSC_HAVE_MUMPS)
32: MatMumpsSetIcntl(ctx->F,26,ival);
33: #endif
34: return(0);
35: }
39: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC pc, Vec rhs, Vec sol)
40: {
41: PetscErrorCode ierr;
44: PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_FALSE);
45: return(0);
46: }
50: static PetscErrorCode PCBDDCMumpsCorrectionSolveTranspose(PC pc, Vec rhs, Vec sol)
51: {
52: PetscErrorCode ierr;
55: PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_TRUE);
56: return(0);
57: }
61: static PetscErrorCode PCBDDCReuseMumpsReset(PCBDDCReuseMumps reuse)
62: {
66: MatDestroy(&reuse->F);
67: VecDestroy(&reuse->sol);
68: VecDestroy(&reuse->rhs);
69: PCDestroy(&reuse->interior_solver);
70: PCDestroy(&reuse->correction_solver);
71: ISDestroy(&reuse->is_R);
72: ISDestroy(&reuse->is_B);
73: VecScatterDestroy(&reuse->correction_scatter_B);
74: VecDestroy(&reuse->sol_B);
75: VecDestroy(&reuse->rhs_B);
76: return(0);
77: }
81: static PetscErrorCode PCBDDCMumpsInteriorSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
82: {
83: PCBDDCReuseMumps ctx;
84: PetscScalar *array,*array_mumps;
85: #if defined(PETSC_HAVE_MUMPS)
86: PetscInt ival;
87: #endif
88: PetscErrorCode ierr;
91: PCShellGetContext(pc,(void **)&ctx);
92: #if defined(PETSC_HAVE_MUMPS)
93: MatMumpsGetIcntl(ctx->F,26,&ival);
94: MatMumpsSetIcntl(ctx->F,26,0);
95: #endif
96: /* copy rhs into factored matrix workspace (can it be avoided?, MatSolve_MUMPS has another copy b->x internally) */
97: VecGetArrayRead(rhs,(const PetscScalar**)&array);
98: VecGetArray(ctx->rhs,&array_mumps);
99: PetscMemcpy(array_mumps,array,ctx->n_I*sizeof(PetscScalar));
100: VecRestoreArray(ctx->rhs,&array_mumps);
101: VecRestoreArrayRead(rhs,(const PetscScalar**)&array);
103: if (transpose) {
104: MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
105: } else {
106: MatSolve(ctx->F,ctx->rhs,ctx->sol);
107: }
109: /* get back data to caller worskpace */
110: VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
111: VecGetArray(sol,&array);
112: PetscMemcpy(array,array_mumps,ctx->n_I*sizeof(PetscScalar));
113: VecRestoreArray(sol,&array);
114: VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
115: #if defined(PETSC_HAVE_MUMPS)
116: MatMumpsSetIcntl(ctx->F,26,ival);
117: #endif
118: return(0);
119: }
123: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC pc, Vec rhs, Vec sol)
124: {
125: PetscErrorCode ierr;
128: PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_FALSE);
129: return(0);
130: }
134: static PetscErrorCode PCBDDCMumpsInteriorSolveTranspose(PC pc, Vec rhs, Vec sol)
135: {
136: PetscErrorCode ierr;
139: PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_TRUE);
140: return(0);
141: }
145: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
146: {
147: Mat B, C, D, Bd, Cd, AinvBd;
148: KSP ksp;
149: PC pc;
150: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
151: PetscReal fill = 2.0;
152: PetscInt n_I;
153: PetscMPIInt size;
157: MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
158: if (size != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
159: if (reuse == MAT_REUSE_MATRIX) {
160: PetscBool Sdense;
162: PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
163: if (!Sdense) SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
164: }
165: MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
166: MatSchurComplementGetKSP(M, &ksp);
167: KSPGetPC(ksp, &pc);
168: PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
169: PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
170: PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
171: PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
172: PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
173: MatGetSize(B,&n_I,NULL);
174: if (n_I) {
175: if (!Bdense) {
176: MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
177: } else {
178: Bd = B;
179: }
181: if (isLU || isILU || isCHOL) {
182: Mat fact;
183: KSPSetUp(ksp);
184: PCFactorGetMatrix(pc, &fact);
185: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
186: MatMatSolve(fact, Bd, AinvBd);
187: } else {
188: PetscBool ex = PETSC_TRUE;
190: if (ex) {
191: Mat Ainvd;
193: PCComputeExplicitOperator(pc, &Ainvd);
194: MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
195: MatDestroy(&Ainvd);
196: } else {
197: Vec sol,rhs;
198: PetscScalar *arrayrhs,*arraysol;
199: PetscInt i,nrhs,n;
201: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
202: MatGetSize(Bd,&n,&nrhs);
203: MatDenseGetArray(Bd,&arrayrhs);
204: MatDenseGetArray(AinvBd,&arraysol);
205: KSPGetSolution(ksp,&sol);
206: KSPGetRhs(ksp,&rhs);
207: for (i=0;i<nrhs;i++) {
208: VecPlaceArray(rhs,arrayrhs+i*n);
209: VecPlaceArray(sol,arraysol+i*n);
210: KSPSolve(ksp,rhs,sol);
211: VecResetArray(rhs);
212: VecResetArray(sol);
213: }
214: MatDenseRestoreArray(Bd,&arrayrhs);
215: MatDenseRestoreArray(AinvBd,&arrayrhs);
216: }
217: }
218: if (!Bdense & !issym) {
219: MatDestroy(&Bd);
220: }
222: if (!issym) {
223: if (!Cdense) {
224: MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
225: } else {
226: Cd = C;
227: }
228: MatMatMult(Cd, AinvBd, reuse, fill, S);
229: if (!Cdense) {
230: MatDestroy(&Cd);
231: }
232: } else {
233: MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
234: if (!Bdense) {
235: MatDestroy(&Bd);
236: }
237: }
238: MatDestroy(&AinvBd);
239: }
241: if (D) {
242: Mat Dd;
243: PetscBool Ddense;
245: PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
246: if (!Ddense) {
247: MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
248: } else {
249: Dd = D;
250: }
251: if (n_I) {
252: MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
253: } else {
254: if (reuse == MAT_INITIAL_MATRIX) {
255: MatDuplicate(Dd,MAT_COPY_VALUES,S);
256: } else {
257: MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
258: }
259: }
260: if (!Ddense) {
261: MatDestroy(&Dd);
262: }
263: } else {
264: MatScale(*S,-1.0);
265: }
266: return(0);
267: }
271: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool faster_deluxe, PetscBool compute_Stilda, PetscBool reuse_solvers)
272: {
273: Mat F,A_II,A_IB,A_BI,A_BB,AE_II;
274: Mat S_all;
275: Mat global_schur_subsets,work_mat,*submats;
276: ISLocalToGlobalMapping l2gmap_subsets;
277: IS is_I,is_I_layer;
278: IS all_subsets,all_subsets_mult,all_subsets_n;
279: PetscInt *nnz,*all_local_idx_N;
280: PetscInt *auxnum1,*auxnum2;
281: PetscInt i,subset_size,max_subset_size;
282: PetscInt extra,local_size,global_size;
283: PetscBLASInt B_N,B_ierr,B_lwork,*pivots;
284: PetscScalar *Bwork;
285: PetscSubcomm subcomm;
286: PetscMPIInt color,rank;
287: MPI_Comm comm_n;
288: PetscErrorCode ierr;
291: /* update info in sub_schurs */
292: MatDestroy(&sub_schurs->A);
293: MatDestroy(&sub_schurs->S);
294: sub_schurs->is_hermitian = PETSC_FALSE;
295: sub_schurs->is_posdef = PETSC_FALSE;
296: if (Ain) {
297: PetscBool isseqaij;
298: /* determine if we are dealing with hermitian positive definite problems */
299: #if !defined(PETSC_USE_COMPLEX)
300: if (Ain->symmetric_set) {
301: sub_schurs->is_hermitian = Ain->symmetric;
302: }
303: #else
304: if (Ain->hermitian_set) {
305: sub_schurs->is_hermitian = Ain->hermitian;
306: }
307: #endif
308: if (Ain->spd_set) {
309: sub_schurs->is_posdef = Ain->spd;
310: }
312: /* check */
313: PetscObjectTypeCompare((PetscObject)Ain,MATSEQAIJ,&isseqaij);
314: if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
315: PetscInt lsize;
317: MatGetSize(Ain,&lsize,NULL);
318: if (lsize) {
319: PetscScalar val;
320: PetscReal norm;
321: Vec vec1,vec2,vec3;
323: MatCreateVecs(Ain,&vec1,&vec2);
324: VecDuplicate(vec1,&vec3);
325: VecSetRandom(vec1,NULL);
326: MatMult(Ain,vec1,vec2);
327: #if !defined(PETSC_USE_COMPLEX)
328: MatMultTranspose(Ain,vec1,vec3);
329: #else
330: MatMultHermitianTranspose(Ain,vec1,vec3);
331: #endif
332: VecAXPY(vec3,-1.0,vec2);
333: VecNorm(vec3,NORM_INFINITY,&norm);
334: if (norm > PetscSqrtReal(PETSC_SMALL)) {
335: sub_schurs->is_hermitian = PETSC_FALSE;
336: } else {
337: sub_schurs->is_hermitian = PETSC_TRUE;
338: }
339: VecDot(vec1,vec2,&val);
340: if (PetscRealPart(val) > 0. && PetscAbsReal(PetscImaginaryPart(val)) < PETSC_SMALL) sub_schurs->is_posdef = PETSC_TRUE;
341: VecDestroy(&vec1);
342: VecDestroy(&vec2);
343: VecDestroy(&vec3);
344: } else {
345: sub_schurs->is_hermitian = PETSC_TRUE;
346: sub_schurs->is_posdef = PETSC_TRUE;
347: }
348: }
349: if (isseqaij) {
350: PetscObjectReference((PetscObject)Ain);
351: sub_schurs->A = Ain;
352: } else {
353: MatConvert(Ain,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);
354: }
355: }
356: if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"General matrix pencils are not currently supported (%D,%D)",sub_schurs->is_hermitian,sub_schurs->is_posdef);
358: PetscObjectReference((PetscObject)Sin);
359: sub_schurs->S = Sin;
360: if (sub_schurs->use_mumps) {
361: sub_schurs->use_mumps = (PetscBool)(!!sub_schurs->A);
362: }
364: /* preliminary checks */
365: if (!sub_schurs->use_mumps && compute_Stilda) SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
367: /* restrict work on active processes */
368: color = 0;
369: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
370: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
371: PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
372: PetscSubcommSetNumber(subcomm,2);
373: PetscSubcommSetTypeGeneral(subcomm,color,rank);
374: PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
375: PetscSubcommDestroy(&subcomm);
376: if (!sub_schurs->n_subs) {
377: PetscCommDestroy(&comm_n);
378: return(0);
379: }
380: /* PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL); */
382: /* get Schur complement matrices */
383: if (!sub_schurs->use_mumps) {
384: Mat tA_IB,tA_BI,tA_BB;
385: PetscBool isseqsbaij;
386: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
387: PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
388: if (isseqsbaij) {
389: MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
390: MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
391: MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
392: } else {
393: PetscObjectReference((PetscObject)tA_BB);
394: A_BB = tA_BB;
395: PetscObjectReference((PetscObject)tA_IB);
396: A_IB = tA_IB;
397: PetscObjectReference((PetscObject)tA_BI);
398: A_BI = tA_BI;
399: }
400: } else {
401: A_II = NULL;
402: A_IB = NULL;
403: A_BI = NULL;
404: A_BB = NULL;
405: }
406: S_all = NULL;
408: /* determine interior problems */
409: ISGetLocalSize(sub_schurs->is_I,&i);
410: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
411: PetscBT touched;
412: const PetscInt* idx_B;
413: PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
415: if (!xadj || !adjncy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
416: /* get sizes */
417: ISGetLocalSize(sub_schurs->is_I,&n_I);
418: ISGetLocalSize(sub_schurs->is_B,&n_B);
420: PetscMalloc1(n_I+n_B,&local_numbering);
421: PetscBTCreate(n_I+n_B,&touched);
422: PetscBTMemzero(n_I+n_B,touched);
424: /* all boundary dofs must be skipped when adding layers */
425: ISGetIndices(sub_schurs->is_B,&idx_B);
426: for (j=0;j<n_B;j++) {
427: PetscBTSet(touched,idx_B[j]);
428: }
429: PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));
430: ISRestoreIndices(sub_schurs->is_B,&idx_B);
432: /* add prescribed number of layers of dofs */
433: n_local_dofs = n_B;
434: n_prev_added = n_B;
435: for (layer=0;layer<nlayers;layer++) {
436: PetscInt n_added;
437: if (n_local_dofs == n_I+n_B) break;
438: if (n_local_dofs > n_I+n_B) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B);
439: PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
440: n_prev_added = n_added;
441: n_local_dofs += n_added;
442: if (!n_added) break;
443: }
444: PetscBTDestroy(&touched);
446: /* IS for I layer dofs in original numbering */
447: ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
448: PetscFree(local_numbering);
449: ISSort(is_I_layer);
450: /* IS for I layer dofs in I numbering */
451: if (!sub_schurs->use_mumps) {
452: ISLocalToGlobalMapping ItoNmap;
453: ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
454: ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
455: ISLocalToGlobalMappingDestroy(&ItoNmap);
457: /* II block */
458: MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
459: }
460: } else {
461: PetscInt n_I;
463: /* IS for I dofs in original numbering */
464: PetscObjectReference((PetscObject)sub_schurs->is_I);
465: is_I_layer = sub_schurs->is_I;
467: /* IS for I dofs in I numbering (strided 1) */
468: if (!sub_schurs->use_mumps) {
469: ISGetSize(sub_schurs->is_I,&n_I);
470: ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);
472: /* II block is the same */
473: PetscObjectReference((PetscObject)A_II);
474: AE_II = A_II;
475: }
476: }
478: /* Get info on subset sizes and sum of all subsets sizes */
479: max_subset_size = 0;
480: local_size = 0;
481: for (i=0;i<sub_schurs->n_subs;i++) {
482: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
483: max_subset_size = PetscMax(subset_size,max_subset_size);
484: local_size += subset_size;
485: }
487: /* Work arrays for local indices */
488: extra = 0;
489: if (sub_schurs->use_mumps && is_I_layer) {
490: ISGetLocalSize(is_I_layer,&extra);
491: }
492: PetscMalloc1(local_size+extra,&all_local_idx_N);
493: if (extra) {
494: const PetscInt *idxs;
495: ISGetIndices(is_I_layer,&idxs);
496: PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));
497: ISRestoreIndices(is_I_layer,&idxs);
498: }
499: PetscMalloc1(local_size,&nnz);
500: PetscMalloc1(sub_schurs->n_subs,&auxnum1);
501: PetscMalloc1(sub_schurs->n_subs,&auxnum2);
503: /* Get local indices in local numbering */
504: local_size = 0;
505: for (i=0;i<sub_schurs->n_subs;i++) {
506: PetscInt j;
507: const PetscInt *idxs;
509: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
510: ISGetIndices(sub_schurs->is_subs[i],&idxs);
511: /* start (smallest in global ordering) and multiplicity */
512: auxnum1[i] = idxs[0];
513: auxnum2[i] = subset_size;
514: /* subset indices in local numbering */
515: PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));
516: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
517: for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
518: local_size += subset_size;
519: }
521: /* allocate extra workspace needed only for GETRI */
522: Bwork = NULL;
523: pivots = NULL;
524: if (local_size && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
525: PetscScalar lwork;
527: B_lwork = -1;
528: PetscBLASIntCast(local_size,&B_N);
529: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
530: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,Bwork,&B_N,pivots,&lwork,&B_lwork,&B_ierr));
531: PetscFPTrapPop();
532: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
533: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
534: PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
535: }
537: /* prepare parallel matrices for summing up properly schurs on subsets */
538: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
539: ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
540: ISDestroy(&all_subsets_n);
541: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
542: PCBDDCSubsetNumbering(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
543: ISDestroy(&all_subsets);
544: ISDestroy(&all_subsets_mult);
545: ISGetLocalSize(all_subsets_n,&i);
546: if (i != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_size);
547: ISLocalToGlobalMappingCreateIS(all_subsets_n,&l2gmap_subsets);
548: MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,NULL,&work_mat);
549: ISLocalToGlobalMappingDestroy(&l2gmap_subsets);
550: MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);
551: MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);
552: MatSetType(global_schur_subsets,MATMPIAIJ);
554: /* subset indices in local boundary numbering */
555: if (!sub_schurs->is_Ej_all) {
556: PetscInt *all_local_idx_B;
558: PetscMalloc1(local_size,&all_local_idx_B);
559: ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
560: if (subset_size != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D\n",subset_size,local_size);
561: ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
562: }
564: /* Local matrix of all local Schur on subsets (transposed) */
565: if (!sub_schurs->S_Ej_all) {
566: MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);
567: MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);
568: MatSetType(sub_schurs->S_Ej_all,MATAIJ);
569: MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);
570: }
572: /* Compute Schur complements explicitly */
573: F = NULL;
574: if (!sub_schurs->use_mumps) {
575: Mat S_Ej_expl;
576: PetscScalar *work;
577: PetscInt j,*dummy_idx;
578: PetscBool Sdense;
580: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
581: local_size = 0;
582: for (i=0;i<sub_schurs->n_subs;i++) {
583: IS is_subset_B;
584: Mat AE_EE,AE_IE,AE_EI,S_Ej;
586: /* subsets in original and boundary numbering */
587: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
588: /* EE block */
589: MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
590: /* IE block */
591: MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
592: /* EI block */
593: if (sub_schurs->is_hermitian) {
594: MatCreateTranspose(AE_IE,&AE_EI);
595: } else {
596: MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
597: }
598: ISDestroy(&is_subset_B);
599: MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
600: MatDestroy(&AE_EE);
601: MatDestroy(&AE_IE);
602: MatDestroy(&AE_EI);
603: if (AE_II == A_II) { /* we can reuse the same ksp */
604: KSP ksp;
605: MatSchurComplementGetKSP(sub_schurs->S,&ksp);
606: MatSchurComplementSetKSP(S_Ej,ksp);
607: } else { /* build new ksp object which inherits ksp and pc types from the original one */
608: KSP origksp,schurksp;
609: PC origpc,schurpc;
610: KSPType ksp_type;
611: PetscInt n_internal;
612: PetscBool ispcnone;
614: MatSchurComplementGetKSP(sub_schurs->S,&origksp);
615: MatSchurComplementGetKSP(S_Ej,&schurksp);
616: KSPGetType(origksp,&ksp_type);
617: KSPSetType(schurksp,ksp_type);
618: KSPGetPC(schurksp,&schurpc);
619: KSPGetPC(origksp,&origpc);
620: PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
621: if (!ispcnone) {
622: PCType pc_type;
623: PCGetType(origpc,&pc_type);
624: PCSetType(schurpc,pc_type);
625: } else {
626: PCSetType(schurpc,PCLU);
627: }
628: ISGetSize(is_I,&n_internal);
629: if (n_internal) { /* UMFPACK gives error with 0 sized problems */
630: MatSolverPackage solver=NULL;
631: PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);
632: if (solver) {
633: PCFactorSetMatSolverPackage(schurpc,solver);
634: }
635: }
636: KSPSetUp(schurksp);
637: }
638: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
639: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
640: PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);
641: PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
642: if (Sdense) {
643: for (j=0;j<subset_size;j++) {
644: dummy_idx[j]=local_size+j;
645: }
646: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
647: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
648: MatDestroy(&S_Ej);
649: MatDestroy(&S_Ej_expl);
650: local_size += subset_size;
651: }
652: PetscFree2(dummy_idx,work);
653: /* free */
654: ISDestroy(&is_I);
655: MatDestroy(&AE_II);
656: PetscFree(all_local_idx_N);
657: } else {
658: Mat A;
659: IS is_A_all;
660: PetscScalar *work,*S_data;
661: PetscInt n_I,n_I_all,*dummy_idx,size_schur,size_active_schur,cum,cum2;
662: PetscBool mumps_S;
664: /* get working mat */
665: n_I = 0;
666: if (is_I_layer) {
667: ISGetLocalSize(is_I_layer,&n_I);
668: }
669: if (!sub_schurs->is_dir) {
670: ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);
671: size_schur = local_size;
672: } else {
673: IS list[2];
675: ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&list[0]);
676: list[1] = sub_schurs->is_dir;
677: ISConcatenate(PETSC_COMM_SELF,2,list,&is_A_all);
678: ISDestroy(&list[0]);
679: ISGetLocalSize(sub_schurs->is_dir,&size_schur);
680: size_schur += local_size;
681: }
682: PetscFree(all_local_idx_N);
683: size_active_schur = local_size; /* size active schurs does not count any dirichlet dof on the interface */
684: MatGetSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
685: MatSetOptionsPrefix(A,"sub_schurs_");
686: MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_hermitian);
687: MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
688: MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);
690: if (n_I) {
691: IS is_schur;
693: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
694: MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);
695: } else {
696: MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);
697: }
698: /* subsets ordered last */
699: ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur);
700: MatFactorSetSchurIS(F,is_schur);
701: ISDestroy(&is_schur);
703: /* factorization step */
704: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
705: MatCholeskyFactorSymbolic(F,A,NULL,NULL);
706: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
707: MatMumpsSetIcntl(F,19,2);
708: #endif
709: MatCholeskyFactorNumeric(F,A,NULL);
710: } else {
711: MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
712: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
713: MatMumpsSetIcntl(F,19,3);
714: #endif
715: MatLUFactorNumeric(F,A,NULL);
716: }
718: /* get explicit Schur Complement computed during numeric factorization */
719: MatFactorGetSchurComplement(F,&S_all);
721: /* we can reuse the solvers if we are not using the economic version */
722: ISGetLocalSize(sub_schurs->is_I,&n_I_all);
723: reuse_solvers = (PetscBool)(reuse_solvers && (n_I == n_I_all));
724: mumps_S = PETSC_TRUE;
725: } else { /* we can't use MUMPS when size_schur == size_of_the_problem */
726: MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
727: reuse_solvers = PETSC_FALSE;
728: mumps_S = PETSC_FALSE;
729: }
731: if (reuse_solvers) {
732: Mat A_II;
733: Vec vec1_B;
734: PCBDDCReuseMumps msolv_ctx;
736: if (sub_schurs->reuse_mumps) {
737: PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
738: } else {
739: PetscNew(&sub_schurs->reuse_mumps);
740: }
741: msolv_ctx = sub_schurs->reuse_mumps;
742: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
743: MatGetSize(A_II,&msolv_ctx->n_I,NULL);
744: PetscObjectReference((PetscObject)F);
745: msolv_ctx->F = F;
746: MatCreateVecs(F,&msolv_ctx->sol,&msolv_ctx->rhs);
748: /* interior solver */
749: PCCreate(PETSC_COMM_SELF,&msolv_ctx->interior_solver);
750: PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
751: PCSetType(msolv_ctx->interior_solver,PCSHELL);
752: PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
753: PCShellSetApply(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolve);
754: PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolveTranspose);
756: /* correction solver */
757: PCCreate(PETSC_COMM_SELF,&msolv_ctx->correction_solver);
758: PCSetOperators(msolv_ctx->correction_solver,A,A);
759: PCSetType(msolv_ctx->correction_solver,PCSHELL);
760: PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
761: PCShellSetApply(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolve);
762: PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolveTranspose);
764: /* scatter and vecs for Schur complement solver */
765: MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
766: MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
767: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
768: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
769: VecDestroy(&vec1_B);
770: PetscObjectReference((PetscObject)is_A_all);
771: msolv_ctx->is_R = is_A_all;
772: }
773: MatDestroy(&A);
774: ISDestroy(&is_A_all);
776: /* Work arrays */
777: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
779: /* matrices for adaptive selection */
780: if (compute_Stilda) {
781: if (!sub_schurs->sum_S_Ej_tilda_all) {
782: MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_tilda_all);
783: MatSetSizes(sub_schurs->sum_S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
784: MatSetType(sub_schurs->sum_S_Ej_tilda_all,MATAIJ);
785: MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_tilda_all,0,nnz);
786: }
787: if (!sub_schurs->sum_S_Ej_inv_all) {
788: MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_inv_all);
789: MatSetSizes(sub_schurs->sum_S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
790: MatSetType(sub_schurs->sum_S_Ej_inv_all,MATAIJ);
791: MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_inv_all,0,nnz);
792: }
793: }
795: /* S_Ej_all */
796: cum = cum2 = 0;
797: MatDenseGetArray(S_all,&S_data);
798: for (i=0;i<sub_schurs->n_subs;i++) {
799: PetscInt j;
801: /* get S_E */
802: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
803: if (mumps_S && sub_schurs->is_hermitian) { /* When using MUMPS data I need to expand to upper triangular (column oriented) */
804: PetscInt k;
805: for (k=0;k<subset_size;k++) {
806: for (j=k;j<subset_size;j++) {
807: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
808: work[j*subset_size+k] = S_data[cum2+k*size_schur+j];
809: }
810: }
811: } else { /* copy to workspace */
812: PetscInt k;
813: for (k=0;k<subset_size;k++) {
814: for (j=0;j<subset_size;j++) {
815: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
816: }
817: }
818: }
819: /* insert S_E values */
820: for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
821: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
823: /* if adaptivity is requested, invert S_E block */
824: if (compute_Stilda) {
825: PetscBLASIntCast(subset_size,&B_N);
826: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
827: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
828: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
829: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
830: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
831: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
832: } else {
833: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
834: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
835: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
836: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
837: }
838: PetscFPTrapPop();
839: MatSetValues(sub_schurs->sum_S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
840: }
841: cum += subset_size;
842: cum2 += subset_size*(size_schur + 1);
843: }
844: MatDenseRestoreArray(S_all,&S_data);
846: if (mumps_S) {
847: MatFactorRestoreSchurComplement(F,&S_all);
848: }
850: if (compute_Stilda && size_active_schur) {
851: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur) { /* we already computed the inverse */
852: PetscInt j;
853: for (j=0;j<size_schur;j++) dummy_idx[j] = j;
854: MatSetValues(sub_schurs->sum_S_Ej_tilda_all,size_schur,dummy_idx,size_schur,dummy_idx,work,INSERT_VALUES);
855: } else {
856: if (mumps_S) { /* use MatFactor calls to invert S */
857: MatFactorInvertSchurComplement(F);
858: MatFactorGetSchurComplement(F,&S_all);
859: } else { /* we need to invert explicitly since we are not using MUMPS for S */
860: MatDenseGetArray(S_all,&S_data);
861: PetscBLASIntCast(size_schur,&B_N);
862: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
863: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
864: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
865: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
866: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
867: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
868: } else {
869: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
870: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
871: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
872: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
873: }
874: PetscFPTrapPop();
875: MatDenseRestoreArray(S_all,&S_data);
876: }
877: /* S_Ej_tilda_all */
878: cum = cum2 = 0;
879: MatDenseGetArray(S_all,&S_data);
880: for (i=0;i<sub_schurs->n_subs;i++) {
881: PetscInt j;
883: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
884: /* get (St^-1)_E */
885: if (sub_schurs->is_hermitian) { /* Here I don't need to expand to upper triangular (column oriented) */
886: PetscInt k;
887: for (k=0;k<subset_size;k++) {
888: for (j=k;j<subset_size;j++) {
889: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
890: }
891: }
892: } else { /* copy to workspace */
893: PetscInt k;
894: for (k=0;k<subset_size;k++) {
895: for (j=0;j<subset_size;j++) {
896: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
897: }
898: }
899: }
900: for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
901: MatSetValues(sub_schurs->sum_S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
902: cum += subset_size;
903: cum2 += subset_size*(size_schur + 1);
904: }
905: MatDenseRestoreArray(S_all,&S_data);
906: if (mumps_S) {
907: MatFactorRestoreSchurComplement(F,&S_all);
908: }
909: }
910: }
911: PetscFree2(dummy_idx,work);
912: }
913: PetscFree(nnz);
914: MatDestroy(&F);
915: ISDestroy(&is_I_layer);
916: MatDestroy(&S_all);
917: MatDestroy(&A_BB);
918: MatDestroy(&A_IB);
919: MatDestroy(&A_BI);
920: MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
921: MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
922: if (compute_Stilda) {
923: MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
924: MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
925: MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
926: MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
927: }
929: /* Global matrix of all assembled Schur on subsets */
930: MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);
931: MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);
932: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
934: /* Get local part of (\sum_j S_Ej) */
935: if (!sub_schurs->sum_S_Ej_all) {
936: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_INITIAL_MATRIX,&submats);
937: sub_schurs->sum_S_Ej_all = submats[0];
938: } else {
939: PetscMalloc1(1,&submats);
940: submats[0] = sub_schurs->sum_S_Ej_all;
941: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
942: }
944: /* Compute explicitly (\sum_j S_Ej)^-1 (faster scaling during PCApply, needs extra work when doing setup) */
945: if (faster_deluxe) {
946: Mat tmpmat;
947: PetscScalar *array;
948: PetscInt cum;
950: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);
951: cum = 0;
952: for (i=0;i<sub_schurs->n_subs;i++) {
953: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
954: PetscBLASIntCast(subset_size,&B_N);
955: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
956: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
957: PetscInt j,k;
959: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
960: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
961: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
962: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
963: for (j=0;j<B_N;j++) {
964: for (k=j+1;k<B_N;k++) {
965: array[k*B_N+j+cum] = array[j*B_N+k+cum];
966: }
967: }
968: } else {
969: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
970: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
971: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
972: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
973: }
974: PetscFPTrapPop();
975: cum += subset_size*subset_size;
976: }
977: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);
978: MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);
979: MatDestroy(&sub_schurs->S_Ej_all);
980: MatDestroy(&sub_schurs->sum_S_Ej_all);
981: sub_schurs->S_Ej_all = tmpmat;
982: }
984: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
985: if (compute_Stilda) {
986: MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_tilda_all);
987: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
988: submats[0] = sub_schurs->sum_S_Ej_tilda_all;
989: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
990: MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_inv_all);
991: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
992: submats[0] = sub_schurs->sum_S_Ej_inv_all;
993: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
994: }
996: /* free workspace */
997: PetscFree(submats);
998: PetscFree2(Bwork,pivots);
999: MatDestroy(&global_schur_subsets);
1000: MatDestroy(&work_mat);
1001: ISDestroy(&all_subsets_n);
1002: PetscCommDestroy(&comm_n);
1003: return(0);
1004: }
1008: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
1009: {
1010: IS *faces,*edges,*all_cc,vertices;
1011: PetscInt i,n_faces,n_edges,n_all_cc;
1012: PetscBool is_sorted;
1013: PetscErrorCode ierr;
1016: ISSorted(is_I,&is_sorted);
1017: if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1018: ISSorted(is_B,&is_sorted);
1019: if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1021: /* reset any previous data */
1022: PCBDDCSubSchursReset(sub_schurs);
1024: /* get index sets for faces and edges (already sorted by global ordering) */
1025: PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1026: n_all_cc = n_faces+n_edges;
1027: PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1028: PetscMalloc1(n_all_cc,&all_cc);
1029: for (i=0;i<n_faces;i++) {
1030: all_cc[i] = faces[i];
1031: }
1032: for (i=0;i<n_edges;i++) {
1033: all_cc[n_faces+i] = edges[i];
1034: PetscBTSet(sub_schurs->is_edge,n_faces+i);
1035: }
1036: PetscFree(faces);
1037: PetscFree(edges);
1038: sub_schurs->is_dir = NULL;
1039: PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);
1041: /* Determine if MUMPS can be used */
1042: sub_schurs->use_mumps = PETSC_FALSE;
1043: #if defined(PETSC_HAVE_MUMPS)
1044: sub_schurs->use_mumps = PETSC_TRUE;
1045: #endif
1047: PetscObjectReference((PetscObject)is_I);
1048: sub_schurs->is_I = is_I;
1049: PetscObjectReference((PetscObject)is_B);
1050: sub_schurs->is_B = is_B;
1051: PetscObjectReference((PetscObject)graph->l2gmap);
1052: sub_schurs->l2gmap = graph->l2gmap;
1053: PetscObjectReference((PetscObject)BtoNmap);
1054: sub_schurs->BtoNmap = BtoNmap;
1055: sub_schurs->n_subs = n_all_cc;
1056: sub_schurs->is_subs = all_cc;
1057: if (!sub_schurs->use_mumps) { /* sort by local ordering mumps is not present */
1058: for (i=0;i<sub_schurs->n_subs;i++) {
1059: ISSort(sub_schurs->is_subs[i]);
1060: }
1061: }
1062: sub_schurs->is_vertices = vertices;
1063: sub_schurs->S_Ej_all = NULL;
1064: sub_schurs->sum_S_Ej_all = NULL;
1065: sub_schurs->sum_S_Ej_inv_all = NULL;
1066: sub_schurs->sum_S_Ej_tilda_all = NULL;
1067: sub_schurs->is_Ej_all = NULL;
1068: return(0);
1069: }
1073: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1074: {
1075: PCBDDCSubSchurs schurs_ctx;
1076: PetscErrorCode ierr;
1079: PetscNew(&schurs_ctx);
1080: schurs_ctx->n_subs = 0;
1081: *sub_schurs = schurs_ctx;
1082: return(0);
1083: }
1087: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
1088: {
1092: PCBDDCSubSchursReset(*sub_schurs);
1093: PetscFree(*sub_schurs);
1094: return(0);
1095: }
1099: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1100: {
1101: PetscInt i;
1105: MatDestroy(&sub_schurs->A);
1106: MatDestroy(&sub_schurs->S);
1107: ISDestroy(&sub_schurs->is_I);
1108: ISDestroy(&sub_schurs->is_B);
1109: ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1110: ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1111: MatDestroy(&sub_schurs->S_Ej_all);
1112: MatDestroy(&sub_schurs->sum_S_Ej_all);
1113: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1114: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1115: ISDestroy(&sub_schurs->is_Ej_all);
1116: ISDestroy(&sub_schurs->is_vertices);
1117: ISDestroy(&sub_schurs->is_dir);
1118: PetscBTDestroy(&sub_schurs->is_edge);
1119: for (i=0;i<sub_schurs->n_subs;i++) {
1120: ISDestroy(&sub_schurs->is_subs[i]);
1121: }
1122: if (sub_schurs->n_subs) {
1123: PetscFree(sub_schurs->is_subs);
1124: }
1125: if (sub_schurs->reuse_mumps) {
1126: PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
1127: }
1128: PetscFree(sub_schurs->reuse_mumps);
1129: sub_schurs->n_subs = 0;
1130: return(0);
1131: }
1135: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
1136: {
1137: PetscInt i,j,n;
1141: n = 0;
1142: for (i=-n_prev;i<0;i++) {
1143: PetscInt start_dof = queue_tip[i];
1144: for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
1145: PetscInt dof = adjncy[j];
1146: if (!PetscBTLookup(touched,dof)) {
1147: PetscBTSet(touched,dof);
1148: queue_tip[n] = dof;
1149: n++;
1150: }
1151: }
1152: }
1153: *n_added = n;
1154: return(0);
1155: }