Actual source code: bddcschurs.c

petsc-3.7.2 2016-06-05
Report Typos and Errors
  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <petscblaslapack.h>

  5: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
  6: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
  7: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC,Vec,Vec);
  8: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC,Vec,Vec);

 12: static PetscErrorCode PCBDDCMumpsCorrectionSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
 13: {
 14:   PCBDDCReuseMumps ctx;
 15: #if defined(PETSC_HAVE_MUMPS)
 16:   PetscInt         ival;
 17: #endif
 18:   PetscErrorCode   ierr;

 21:   PCShellGetContext(pc,(void **)&ctx);
 22: #if defined(PETSC_HAVE_MUMPS)
 23:   MatMumpsGetIcntl(ctx->F,26,&ival);
 24:   MatMumpsSetIcntl(ctx->F,26,-1);
 25: #endif
 26:   if (transpose) {
 27:     MatSolveTranspose(ctx->F,rhs,sol);
 28:   } else {
 29:     MatSolve(ctx->F,rhs,sol);
 30:   }
 31: #if defined(PETSC_HAVE_MUMPS)
 32:   MatMumpsSetIcntl(ctx->F,26,ival);
 33: #endif
 34:   return(0);
 35: }

 39: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC pc, Vec rhs, Vec sol)
 40: {
 41:   PetscErrorCode   ierr;

 44:   PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_FALSE);
 45:   return(0);
 46: }

 50: static PetscErrorCode PCBDDCMumpsCorrectionSolveTranspose(PC pc, Vec rhs, Vec sol)
 51: {
 52:   PetscErrorCode   ierr;

 55:   PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_TRUE);
 56:   return(0);
 57: }

 61: static PetscErrorCode PCBDDCReuseMumpsReset(PCBDDCReuseMumps reuse)
 62: {

 66:   MatDestroy(&reuse->F);
 67:   VecDestroy(&reuse->sol);
 68:   VecDestroy(&reuse->rhs);
 69:   PCDestroy(&reuse->interior_solver);
 70:   PCDestroy(&reuse->correction_solver);
 71:   ISDestroy(&reuse->is_R);
 72:   ISDestroy(&reuse->is_B);
 73:   VecScatterDestroy(&reuse->correction_scatter_B);
 74:   VecDestroy(&reuse->sol_B);
 75:   VecDestroy(&reuse->rhs_B);
 76:   return(0);
 77: }

 81: static PetscErrorCode PCBDDCMumpsInteriorSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
 82: {
 83:   PCBDDCReuseMumps ctx;
 84:   PetscScalar      *array,*array_mumps;
 85: #if defined(PETSC_HAVE_MUMPS)
 86:   PetscInt         ival;
 87: #endif
 88:   PetscErrorCode   ierr;

 91:   PCShellGetContext(pc,(void **)&ctx);
 92: #if defined(PETSC_HAVE_MUMPS)
 93:   MatMumpsGetIcntl(ctx->F,26,&ival);
 94:   MatMumpsSetIcntl(ctx->F,26,0);
 95: #endif
 96:   /* copy rhs into factored matrix workspace (can it be avoided?, MatSolve_MUMPS has another copy b->x internally) */
 97:   VecGetArrayRead(rhs,(const PetscScalar**)&array);
 98:   VecGetArray(ctx->rhs,&array_mumps);
 99:   PetscMemcpy(array_mumps,array,ctx->n_I*sizeof(PetscScalar));
100:   VecRestoreArray(ctx->rhs,&array_mumps);
101:   VecRestoreArrayRead(rhs,(const PetscScalar**)&array);

103:   if (transpose) {
104:     MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
105:   } else {
106:     MatSolve(ctx->F,ctx->rhs,ctx->sol);
107:   }

109:   /* get back data to caller worskpace */
110:   VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
111:   VecGetArray(sol,&array);
112:   PetscMemcpy(array,array_mumps,ctx->n_I*sizeof(PetscScalar));
113:   VecRestoreArray(sol,&array);
114:   VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
115: #if defined(PETSC_HAVE_MUMPS)
116:   MatMumpsSetIcntl(ctx->F,26,ival);
117: #endif
118:   return(0);
119: }

123: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC pc, Vec rhs, Vec sol)
124: {
125:   PetscErrorCode   ierr;

128:   PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_FALSE);
129:   return(0);
130: }

134: static PetscErrorCode PCBDDCMumpsInteriorSolveTranspose(PC pc, Vec rhs, Vec sol)
135: {
136:   PetscErrorCode   ierr;

139:   PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_TRUE);
140:   return(0);
141: }

145: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
146: {
147:   Mat            B, C, D, Bd, Cd, AinvBd;
148:   KSP            ksp;
149:   PC             pc;
150:   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
151:   PetscReal      fill = 2.0;
152:   PetscInt       n_I;
153:   PetscMPIInt    size;

157:   MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
158:   if (size != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
159:   if (reuse == MAT_REUSE_MATRIX) {
160:     PetscBool Sdense;

162:     PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
163:     if (!Sdense) SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
164:   }
165:   MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
166:   MatSchurComplementGetKSP(M, &ksp);
167:   KSPGetPC(ksp, &pc);
168:   PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
169:   PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
170:   PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
171:   PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
172:   PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
173:   MatGetSize(B,&n_I,NULL);
174:   if (n_I) {
175:     if (!Bdense) {
176:       MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
177:     } else {
178:       Bd = B;
179:     }

181:     if (isLU || isILU || isCHOL) {
182:       Mat fact;
183:       KSPSetUp(ksp);
184:       PCFactorGetMatrix(pc, &fact);
185:       MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
186:       MatMatSolve(fact, Bd, AinvBd);
187:     } else {
188:       PetscBool ex = PETSC_TRUE;

190:       if (ex) {
191:         Mat Ainvd;

193:         PCComputeExplicitOperator(pc, &Ainvd);
194:         MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
195:         MatDestroy(&Ainvd);
196:       } else {
197:         Vec         sol,rhs;
198:         PetscScalar *arrayrhs,*arraysol;
199:         PetscInt    i,nrhs,n;

201:         MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
202:         MatGetSize(Bd,&n,&nrhs);
203:         MatDenseGetArray(Bd,&arrayrhs);
204:         MatDenseGetArray(AinvBd,&arraysol);
205:         KSPGetSolution(ksp,&sol);
206:         KSPGetRhs(ksp,&rhs);
207:         for (i=0;i<nrhs;i++) {
208:           VecPlaceArray(rhs,arrayrhs+i*n);
209:           VecPlaceArray(sol,arraysol+i*n);
210:           KSPSolve(ksp,rhs,sol);
211:           VecResetArray(rhs);
212:           VecResetArray(sol);
213:         }
214:         MatDenseRestoreArray(Bd,&arrayrhs);
215:         MatDenseRestoreArray(AinvBd,&arrayrhs);
216:       }
217:     }
218:     if (!Bdense & !issym) {
219:       MatDestroy(&Bd);
220:     }

222:     if (!issym) {
223:       if (!Cdense) {
224:         MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
225:       } else {
226:         Cd = C;
227:       }
228:       MatMatMult(Cd, AinvBd, reuse, fill, S);
229:       if (!Cdense) {
230:         MatDestroy(&Cd);
231:       }
232:     } else {
233:       MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
234:       if (!Bdense) {
235:         MatDestroy(&Bd);
236:       }
237:     }
238:     MatDestroy(&AinvBd);
239:   }

241:   if (D) {
242:     Mat       Dd;
243:     PetscBool Ddense;

245:     PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
246:     if (!Ddense) {
247:       MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
248:     } else {
249:       Dd = D;
250:     }
251:     if (n_I) {
252:       MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
253:     } else {
254:       if (reuse == MAT_INITIAL_MATRIX) {
255:         MatDuplicate(Dd,MAT_COPY_VALUES,S);
256:       } else {
257:         MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
258:       }
259:     }
260:     if (!Ddense) {
261:       MatDestroy(&Dd);
262:     }
263:   } else {
264:     MatScale(*S,-1.0);
265:   }
266:   return(0);
267: }

271: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool faster_deluxe, PetscBool compute_Stilda, PetscBool reuse_solvers)
272: {
273:   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
274:   Mat                    S_all;
275:   Mat                    global_schur_subsets,work_mat,*submats;
276:   ISLocalToGlobalMapping l2gmap_subsets;
277:   IS                     is_I,is_I_layer;
278:   IS                     all_subsets,all_subsets_mult,all_subsets_n;
279:   PetscInt               *nnz,*all_local_idx_N;
280:   PetscInt               *auxnum1,*auxnum2;
281:   PetscInt               i,subset_size,max_subset_size;
282:   PetscInt               extra,local_size,global_size;
283:   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
284:   PetscScalar            *Bwork;
285:   PetscSubcomm           subcomm;
286:   PetscMPIInt            color,rank;
287:   MPI_Comm               comm_n;
288:   PetscErrorCode         ierr;

291:   /* update info in sub_schurs */
292:   MatDestroy(&sub_schurs->A);
293:   MatDestroy(&sub_schurs->S);
294:   sub_schurs->is_hermitian = PETSC_FALSE;
295:   sub_schurs->is_posdef = PETSC_FALSE;
296:   if (Ain) {
297:     PetscBool isseqaij;
298:     /* determine if we are dealing with hermitian positive definite problems */
299: #if !defined(PETSC_USE_COMPLEX)
300:     if (Ain->symmetric_set) {
301:       sub_schurs->is_hermitian = Ain->symmetric;
302:     }
303: #else
304:     if (Ain->hermitian_set) {
305:       sub_schurs->is_hermitian = Ain->hermitian;
306:     }
307: #endif
308:     if (Ain->spd_set) {
309:       sub_schurs->is_posdef = Ain->spd;
310:     }

312:     /* check */
313:     PetscObjectTypeCompare((PetscObject)Ain,MATSEQAIJ,&isseqaij);
314:     if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
315:       PetscInt lsize;

317:       MatGetSize(Ain,&lsize,NULL);
318:       if (lsize) {
319:         PetscScalar val;
320:         PetscReal   norm;
321:         Vec         vec1,vec2,vec3;

323:         MatCreateVecs(Ain,&vec1,&vec2);
324:         VecDuplicate(vec1,&vec3);
325:         VecSetRandom(vec1,NULL);
326:         MatMult(Ain,vec1,vec2);
327: #if !defined(PETSC_USE_COMPLEX)
328:         MatMultTranspose(Ain,vec1,vec3);
329: #else
330:         MatMultHermitianTranspose(Ain,vec1,vec3);
331: #endif
332:         VecAXPY(vec3,-1.0,vec2);
333:         VecNorm(vec3,NORM_INFINITY,&norm);
334:         if (norm > PetscSqrtReal(PETSC_SMALL)) {
335:           sub_schurs->is_hermitian = PETSC_FALSE;
336:         } else {
337:           sub_schurs->is_hermitian = PETSC_TRUE;
338:         }
339:         VecDot(vec1,vec2,&val);
340:         if (PetscRealPart(val) > 0. && PetscAbsReal(PetscImaginaryPart(val)) < PETSC_SMALL) sub_schurs->is_posdef = PETSC_TRUE;
341:         VecDestroy(&vec1);
342:         VecDestroy(&vec2);
343:         VecDestroy(&vec3);
344:       } else {
345:         sub_schurs->is_hermitian = PETSC_TRUE;
346:         sub_schurs->is_posdef = PETSC_TRUE;
347:       }
348:     }
349:     if (isseqaij) {
350:       PetscObjectReference((PetscObject)Ain);
351:       sub_schurs->A = Ain;
352:     } else {
353:       MatConvert(Ain,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);
354:     }
355:   }
356:   if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"General matrix pencils are not currently supported (%D,%D)",sub_schurs->is_hermitian,sub_schurs->is_posdef);

358:   PetscObjectReference((PetscObject)Sin);
359:   sub_schurs->S = Sin;
360:   if (sub_schurs->use_mumps) {
361:     sub_schurs->use_mumps = (PetscBool)(!!sub_schurs->A);
362:   }

364:   /* preliminary checks */
365:   if (!sub_schurs->use_mumps && compute_Stilda) SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");

367:   /* restrict work on active processes */
368:   color = 0;
369:   if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
370:   MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
371:   PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
372:   PetscSubcommSetNumber(subcomm,2);
373:   PetscSubcommSetTypeGeneral(subcomm,color,rank);
374:   PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
375:   PetscSubcommDestroy(&subcomm);
376:   if (!sub_schurs->n_subs) {
377:     PetscCommDestroy(&comm_n);
378:     return(0);
379:   }
380:   /* PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL); */

382:   /* get Schur complement matrices */
383:   if (!sub_schurs->use_mumps) {
384:     Mat       tA_IB,tA_BI,tA_BB;
385:     PetscBool isseqsbaij;
386:     MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
387:     PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
388:     if (isseqsbaij) {
389:       MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
390:       MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
391:       MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
392:     } else {
393:       PetscObjectReference((PetscObject)tA_BB);
394:       A_BB = tA_BB;
395:       PetscObjectReference((PetscObject)tA_IB);
396:       A_IB = tA_IB;
397:       PetscObjectReference((PetscObject)tA_BI);
398:       A_BI = tA_BI;
399:     }
400:   } else {
401:     A_II = NULL;
402:     A_IB = NULL;
403:     A_BI = NULL;
404:     A_BB = NULL;
405:   }
406:   S_all = NULL;

408:   /* determine interior problems */
409:   ISGetLocalSize(sub_schurs->is_I,&i);
410:   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
411:     PetscBT                touched;
412:     const PetscInt*        idx_B;
413:     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;

415:     if (!xadj || !adjncy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
416:     /* get sizes */
417:     ISGetLocalSize(sub_schurs->is_I,&n_I);
418:     ISGetLocalSize(sub_schurs->is_B,&n_B);

420:     PetscMalloc1(n_I+n_B,&local_numbering);
421:     PetscBTCreate(n_I+n_B,&touched);
422:     PetscBTMemzero(n_I+n_B,touched);

424:     /* all boundary dofs must be skipped when adding layers */
425:     ISGetIndices(sub_schurs->is_B,&idx_B);
426:     for (j=0;j<n_B;j++) {
427:       PetscBTSet(touched,idx_B[j]);
428:     }
429:     PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));
430:     ISRestoreIndices(sub_schurs->is_B,&idx_B);

432:     /* add prescribed number of layers of dofs */
433:     n_local_dofs = n_B;
434:     n_prev_added = n_B;
435:     for (layer=0;layer<nlayers;layer++) {
436:       PetscInt n_added;
437:       if (n_local_dofs == n_I+n_B) break;
438:       if (n_local_dofs > n_I+n_B) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B);
439:       PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
440:       n_prev_added = n_added;
441:       n_local_dofs += n_added;
442:       if (!n_added) break;
443:     }
444:     PetscBTDestroy(&touched);

446:     /* IS for I layer dofs in original numbering */
447:     ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
448:     PetscFree(local_numbering);
449:     ISSort(is_I_layer);
450:     /* IS for I layer dofs in I numbering */
451:     if (!sub_schurs->use_mumps) {
452:       ISLocalToGlobalMapping ItoNmap;
453:       ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
454:       ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
455:       ISLocalToGlobalMappingDestroy(&ItoNmap);

457:       /* II block */
458:       MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
459:     }
460:   } else {
461:     PetscInt n_I;

463:     /* IS for I dofs in original numbering */
464:     PetscObjectReference((PetscObject)sub_schurs->is_I);
465:     is_I_layer = sub_schurs->is_I;

467:     /* IS for I dofs in I numbering (strided 1) */
468:     if (!sub_schurs->use_mumps) {
469:       ISGetSize(sub_schurs->is_I,&n_I);
470:       ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);

472:       /* II block is the same */
473:       PetscObjectReference((PetscObject)A_II);
474:       AE_II = A_II;
475:     }
476:   }

478:   /* Get info on subset sizes and sum of all subsets sizes */
479:   max_subset_size = 0;
480:   local_size = 0;
481:   for (i=0;i<sub_schurs->n_subs;i++) {
482:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
483:     max_subset_size = PetscMax(subset_size,max_subset_size);
484:     local_size += subset_size;
485:   }

487:   /* Work arrays for local indices */
488:   extra = 0;
489:   if (sub_schurs->use_mumps && is_I_layer) {
490:     ISGetLocalSize(is_I_layer,&extra);
491:   }
492:   PetscMalloc1(local_size+extra,&all_local_idx_N);
493:   if (extra) {
494:     const PetscInt *idxs;
495:     ISGetIndices(is_I_layer,&idxs);
496:     PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));
497:     ISRestoreIndices(is_I_layer,&idxs);
498:   }
499:   PetscMalloc1(local_size,&nnz);
500:   PetscMalloc1(sub_schurs->n_subs,&auxnum1);
501:   PetscMalloc1(sub_schurs->n_subs,&auxnum2);

503:   /* Get local indices in local numbering */
504:   local_size = 0;
505:   for (i=0;i<sub_schurs->n_subs;i++) {
506:     PetscInt j;
507:     const    PetscInt *idxs;

509:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
510:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
511:     /* start (smallest in global ordering) and multiplicity */
512:     auxnum1[i] = idxs[0];
513:     auxnum2[i] = subset_size;
514:     /* subset indices in local numbering */
515:     PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));
516:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
517:     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
518:     local_size += subset_size;
519:   }

521:   /* allocate extra workspace needed only for GETRI */
522:   Bwork = NULL;
523:   pivots = NULL;
524:   if (local_size && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
525:     PetscScalar lwork;

527:     B_lwork = -1;
528:     PetscBLASIntCast(local_size,&B_N);
529:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
530:     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,Bwork,&B_N,pivots,&lwork,&B_lwork,&B_ierr));
531:     PetscFPTrapPop();
532:     if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
533:     PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
534:     PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
535:   }

537:   /* prepare parallel matrices for summing up properly schurs on subsets */
538:   ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
539:   ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
540:   ISDestroy(&all_subsets_n);
541:   ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
542:   PCBDDCSubsetNumbering(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
543:   ISDestroy(&all_subsets);
544:   ISDestroy(&all_subsets_mult);
545:   ISGetLocalSize(all_subsets_n,&i);
546:   if (i != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_size);
547:   ISLocalToGlobalMappingCreateIS(all_subsets_n,&l2gmap_subsets);
548:   MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,NULL,&work_mat);
549:   ISLocalToGlobalMappingDestroy(&l2gmap_subsets);
550:   MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);
551:   MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);
552:   MatSetType(global_schur_subsets,MATMPIAIJ);

554:   /* subset indices in local boundary numbering */
555:   if (!sub_schurs->is_Ej_all) {
556:     PetscInt *all_local_idx_B;

558:     PetscMalloc1(local_size,&all_local_idx_B);
559:     ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
560:     if (subset_size != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D\n",subset_size,local_size);
561:     ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
562:   }

564:   /* Local matrix of all local Schur on subsets (transposed) */
565:   if (!sub_schurs->S_Ej_all) {
566:     MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);
567:     MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);
568:     MatSetType(sub_schurs->S_Ej_all,MATAIJ);
569:     MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);
570:   }

572:   /* Compute Schur complements explicitly */
573:   F = NULL;
574:   if (!sub_schurs->use_mumps) {
575:     Mat         S_Ej_expl;
576:     PetscScalar *work;
577:     PetscInt    j,*dummy_idx;
578:     PetscBool   Sdense;

580:     PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
581:     local_size = 0;
582:     for (i=0;i<sub_schurs->n_subs;i++) {
583:       IS  is_subset_B;
584:       Mat AE_EE,AE_IE,AE_EI,S_Ej;

586:       /* subsets in original and boundary numbering */
587:       ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
588:       /* EE block */
589:       MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
590:       /* IE block */
591:       MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
592:       /* EI block */
593:       if (sub_schurs->is_hermitian) {
594:         MatCreateTranspose(AE_IE,&AE_EI);
595:       } else {
596:         MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
597:       }
598:       ISDestroy(&is_subset_B);
599:       MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
600:       MatDestroy(&AE_EE);
601:       MatDestroy(&AE_IE);
602:       MatDestroy(&AE_EI);
603:       if (AE_II == A_II) { /* we can reuse the same ksp */
604:         KSP ksp;
605:         MatSchurComplementGetKSP(sub_schurs->S,&ksp);
606:         MatSchurComplementSetKSP(S_Ej,ksp);
607:       } else { /* build new ksp object which inherits ksp and pc types from the original one */
608:         KSP       origksp,schurksp;
609:         PC        origpc,schurpc;
610:         KSPType   ksp_type;
611:         PetscInt  n_internal;
612:         PetscBool ispcnone;

614:         MatSchurComplementGetKSP(sub_schurs->S,&origksp);
615:         MatSchurComplementGetKSP(S_Ej,&schurksp);
616:         KSPGetType(origksp,&ksp_type);
617:         KSPSetType(schurksp,ksp_type);
618:         KSPGetPC(schurksp,&schurpc);
619:         KSPGetPC(origksp,&origpc);
620:         PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
621:         if (!ispcnone) {
622:           PCType pc_type;
623:           PCGetType(origpc,&pc_type);
624:           PCSetType(schurpc,pc_type);
625:         } else {
626:           PCSetType(schurpc,PCLU);
627:         }
628:         ISGetSize(is_I,&n_internal);
629:         if (n_internal) { /* UMFPACK gives error with 0 sized problems */
630:           MatSolverPackage solver=NULL;
631:           PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);
632:           if (solver) {
633:             PCFactorSetMatSolverPackage(schurpc,solver);
634:           }
635:         }
636:         KSPSetUp(schurksp);
637:       }
638:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
639:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
640:       PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);
641:       PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
642:       if (Sdense) {
643:         for (j=0;j<subset_size;j++) {
644:           dummy_idx[j]=local_size+j;
645:         }
646:         MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
647:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
648:       MatDestroy(&S_Ej);
649:       MatDestroy(&S_Ej_expl);
650:       local_size += subset_size;
651:     }
652:     PetscFree2(dummy_idx,work);
653:     /* free */
654:     ISDestroy(&is_I);
655:     MatDestroy(&AE_II);
656:     PetscFree(all_local_idx_N);
657:   } else {
658:     Mat         A;
659:     IS          is_A_all;
660:     PetscScalar *work,*S_data;
661:     PetscInt    n_I,n_I_all,*dummy_idx,size_schur,size_active_schur,cum,cum2;
662:     PetscBool   mumps_S;

664:     /* get working mat */
665:     n_I = 0;
666:     if (is_I_layer) {
667:       ISGetLocalSize(is_I_layer,&n_I);
668:     }
669:     if (!sub_schurs->is_dir) {
670:       ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);
671:       size_schur = local_size;
672:     } else {
673:       IS list[2];

675:       ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&list[0]);
676:       list[1] = sub_schurs->is_dir;
677:       ISConcatenate(PETSC_COMM_SELF,2,list,&is_A_all);
678:       ISDestroy(&list[0]);
679:       ISGetLocalSize(sub_schurs->is_dir,&size_schur);
680:       size_schur += local_size;
681:     }
682:     PetscFree(all_local_idx_N);
683:     size_active_schur = local_size; /* size active schurs does not count any dirichlet dof on the interface */
684:     MatGetSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
685:     MatSetOptionsPrefix(A,"sub_schurs_");
686:     MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_hermitian);
687:     MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
688:     MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);

690:     if (n_I) {
691:       IS is_schur;

693:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
694:         MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);
695:       } else {
696:         MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);
697:       }
698:       /* subsets ordered last */
699:       ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur);
700:       MatFactorSetSchurIS(F,is_schur);
701:       ISDestroy(&is_schur);

703:       /* factorization step */
704:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
705:         MatCholeskyFactorSymbolic(F,A,NULL,NULL);
706: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
707:         MatMumpsSetIcntl(F,19,2);
708: #endif
709:         MatCholeskyFactorNumeric(F,A,NULL);
710:       } else {
711:         MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
712: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
713:         MatMumpsSetIcntl(F,19,3);
714: #endif
715:         MatLUFactorNumeric(F,A,NULL);
716:       }

718:       /* get explicit Schur Complement computed during numeric factorization */
719:       MatFactorGetSchurComplement(F,&S_all);

721:       /* we can reuse the solvers if we are not using the economic version */
722:       ISGetLocalSize(sub_schurs->is_I,&n_I_all);
723:       reuse_solvers = (PetscBool)(reuse_solvers && (n_I == n_I_all));
724:       mumps_S = PETSC_TRUE;
725:     } else { /* we can't use MUMPS when size_schur == size_of_the_problem */
726:       MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
727:       reuse_solvers = PETSC_FALSE;
728:       mumps_S = PETSC_FALSE;
729:     }

731:     if (reuse_solvers) {
732:       Mat              A_II;
733:       Vec              vec1_B;
734:       PCBDDCReuseMumps msolv_ctx;

736:       if (sub_schurs->reuse_mumps) {
737:         PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
738:       } else {
739:         PetscNew(&sub_schurs->reuse_mumps);
740:       }
741:       msolv_ctx = sub_schurs->reuse_mumps;
742:       MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
743:       MatGetSize(A_II,&msolv_ctx->n_I,NULL);
744:       PetscObjectReference((PetscObject)F);
745:       msolv_ctx->F = F;
746:       MatCreateVecs(F,&msolv_ctx->sol,&msolv_ctx->rhs);

748:       /* interior solver */
749:       PCCreate(PETSC_COMM_SELF,&msolv_ctx->interior_solver);
750:       PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
751:       PCSetType(msolv_ctx->interior_solver,PCSHELL);
752:       PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
753:       PCShellSetApply(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolve);
754:       PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolveTranspose);

756:       /* correction solver */
757:       PCCreate(PETSC_COMM_SELF,&msolv_ctx->correction_solver);
758:       PCSetOperators(msolv_ctx->correction_solver,A,A);
759:       PCSetType(msolv_ctx->correction_solver,PCSHELL);
760:       PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
761:       PCShellSetApply(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolve);
762:       PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolveTranspose);

764:       /* scatter and vecs for Schur complement solver */
765:       MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
766:       MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
767:       ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
768:       VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
769:       VecDestroy(&vec1_B);
770:       PetscObjectReference((PetscObject)is_A_all);
771:       msolv_ctx->is_R = is_A_all;
772:     }
773:     MatDestroy(&A);
774:     ISDestroy(&is_A_all);

776:     /* Work arrays */
777:     PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);

779:     /* matrices for adaptive selection */
780:     if (compute_Stilda) {
781:       if (!sub_schurs->sum_S_Ej_tilda_all) {
782:         MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_tilda_all);
783:         MatSetSizes(sub_schurs->sum_S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
784:         MatSetType(sub_schurs->sum_S_Ej_tilda_all,MATAIJ);
785:         MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_tilda_all,0,nnz);
786:       }
787:       if (!sub_schurs->sum_S_Ej_inv_all) {
788:         MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_inv_all);
789:         MatSetSizes(sub_schurs->sum_S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
790:         MatSetType(sub_schurs->sum_S_Ej_inv_all,MATAIJ);
791:         MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_inv_all,0,nnz);
792:       }
793:     }

795:     /* S_Ej_all */
796:     cum = cum2 = 0;
797:     MatDenseGetArray(S_all,&S_data);
798:     for (i=0;i<sub_schurs->n_subs;i++) {
799:       PetscInt j;

801:       /* get S_E */
802:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
803:       if (mumps_S && sub_schurs->is_hermitian) { /* When using MUMPS data I need to expand to upper triangular (column oriented) */
804:         PetscInt k;
805:         for (k=0;k<subset_size;k++) {
806:           for (j=k;j<subset_size;j++) {
807:             work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
808:             work[j*subset_size+k] = S_data[cum2+k*size_schur+j];
809:           }
810:         }
811:       } else { /* copy to workspace */
812:         PetscInt k;
813:         for (k=0;k<subset_size;k++) {
814:           for (j=0;j<subset_size;j++) {
815:             work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
816:           }
817:         }
818:       }
819:       /* insert S_E values */
820:       for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
821:       MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);

823:       /* if adaptivity is requested, invert S_E block */
824:       if (compute_Stilda) {
825:         PetscBLASIntCast(subset_size,&B_N);
826:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
827:         if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
828:           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
829:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
830:           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
831:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
832:         } else {
833:           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
834:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
835:           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
836:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
837:         }
838:         PetscFPTrapPop();
839:         MatSetValues(sub_schurs->sum_S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
840:       }
841:       cum += subset_size;
842:       cum2 += subset_size*(size_schur + 1);
843:     }
844:     MatDenseRestoreArray(S_all,&S_data);

846:     if (mumps_S) {
847:       MatFactorRestoreSchurComplement(F,&S_all);
848:     }

850:     if (compute_Stilda && size_active_schur) {
851:       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur) { /* we already computed the inverse */
852:         PetscInt j;
853:         for (j=0;j<size_schur;j++) dummy_idx[j] = j;
854:         MatSetValues(sub_schurs->sum_S_Ej_tilda_all,size_schur,dummy_idx,size_schur,dummy_idx,work,INSERT_VALUES);
855:       } else {
856:         if (mumps_S) { /* use MatFactor calls to invert S */
857:           MatFactorInvertSchurComplement(F);
858:           MatFactorGetSchurComplement(F,&S_all);
859:         } else { /* we need to invert explicitly since we are not using MUMPS for S */
860:           MatDenseGetArray(S_all,&S_data);
861:           PetscBLASIntCast(size_schur,&B_N);
862:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
863:           if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
864:             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
865:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
866:             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
867:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
868:           } else {
869:             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
870:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
871:             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
872:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
873:           }
874:           PetscFPTrapPop();
875:           MatDenseRestoreArray(S_all,&S_data);
876:         }
877:         /* S_Ej_tilda_all */
878:         cum = cum2 = 0;
879:         MatDenseGetArray(S_all,&S_data);
880:         for (i=0;i<sub_schurs->n_subs;i++) {
881:           PetscInt j;

883:           ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
884:           /* get (St^-1)_E */
885:           if (sub_schurs->is_hermitian) { /* Here I don't need to expand to upper triangular (column oriented) */
886:             PetscInt k;
887:             for (k=0;k<subset_size;k++) {
888:               for (j=k;j<subset_size;j++) {
889:                 work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
890:               }
891:             }
892:           } else { /* copy to workspace */
893:             PetscInt k;
894:             for (k=0;k<subset_size;k++) {
895:               for (j=0;j<subset_size;j++) {
896:                 work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
897:               }
898:             }
899:           }
900:           for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
901:           MatSetValues(sub_schurs->sum_S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
902:           cum += subset_size;
903:           cum2 += subset_size*(size_schur + 1);
904:         }
905:         MatDenseRestoreArray(S_all,&S_data);
906:         if (mumps_S) {
907:           MatFactorRestoreSchurComplement(F,&S_all);
908:         }
909:       }
910:     }
911:     PetscFree2(dummy_idx,work);
912:   }
913:   PetscFree(nnz);
914:   MatDestroy(&F);
915:   ISDestroy(&is_I_layer);
916:   MatDestroy(&S_all);
917:   MatDestroy(&A_BB);
918:   MatDestroy(&A_IB);
919:   MatDestroy(&A_BI);
920:   MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
921:   MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
922:   if (compute_Stilda) {
923:     MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
924:     MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
925:     MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
926:     MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
927:   }

929:   /* Global matrix of all assembled Schur on subsets */
930:   MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);
931:   MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);
932:   MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);

934:   /* Get local part of (\sum_j S_Ej) */
935:   if (!sub_schurs->sum_S_Ej_all) {
936:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_INITIAL_MATRIX,&submats);
937:     sub_schurs->sum_S_Ej_all = submats[0];
938:   } else {
939:     PetscMalloc1(1,&submats);
940:     submats[0] = sub_schurs->sum_S_Ej_all;
941:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
942:   }

944:   /* Compute explicitly (\sum_j S_Ej)^-1 (faster scaling during PCApply, needs extra work when doing setup) */
945:   if (faster_deluxe) {
946:     Mat         tmpmat;
947:     PetscScalar *array;
948:     PetscInt    cum;

950:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);
951:     cum = 0;
952:     for (i=0;i<sub_schurs->n_subs;i++) {
953:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
954:       PetscBLASIntCast(subset_size,&B_N);
955:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
956:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
957:         PetscInt j,k;

959:         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
960:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
961:         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
962:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
963:         for (j=0;j<B_N;j++) {
964:           for (k=j+1;k<B_N;k++) {
965:             array[k*B_N+j+cum] = array[j*B_N+k+cum];
966:           }
967:         }
968:       } else {
969:         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
970:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
971:         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
972:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
973:       }
974:       PetscFPTrapPop();
975:       cum += subset_size*subset_size;
976:     }
977:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);
978:     MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);
979:     MatDestroy(&sub_schurs->S_Ej_all);
980:     MatDestroy(&sub_schurs->sum_S_Ej_all);
981:     sub_schurs->S_Ej_all = tmpmat;
982:   }

984:   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
985:   if (compute_Stilda) {
986:     MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_tilda_all);
987:     MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
988:     submats[0] = sub_schurs->sum_S_Ej_tilda_all;
989:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
990:     MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_inv_all);
991:     MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
992:     submats[0] = sub_schurs->sum_S_Ej_inv_all;
993:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
994:   }

996:   /* free workspace */
997:   PetscFree(submats);
998:   PetscFree2(Bwork,pivots);
999:   MatDestroy(&global_schur_subsets);
1000:   MatDestroy(&work_mat);
1001:   ISDestroy(&all_subsets_n);
1002:   PetscCommDestroy(&comm_n);
1003:   return(0);
1004: }

1008: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
1009: {
1010:   IS              *faces,*edges,*all_cc,vertices;
1011:   PetscInt        i,n_faces,n_edges,n_all_cc;
1012:   PetscBool       is_sorted;
1013:   PetscErrorCode  ierr;

1016:   ISSorted(is_I,&is_sorted);
1017:   if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1018:   ISSorted(is_B,&is_sorted);
1019:   if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");

1021:   /* reset any previous data */
1022:   PCBDDCSubSchursReset(sub_schurs);

1024:   /* get index sets for faces and edges (already sorted by global ordering) */
1025:   PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1026:   n_all_cc = n_faces+n_edges;
1027:   PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1028:   PetscMalloc1(n_all_cc,&all_cc);
1029:   for (i=0;i<n_faces;i++) {
1030:     all_cc[i] = faces[i];
1031:   }
1032:   for (i=0;i<n_edges;i++) {
1033:     all_cc[n_faces+i] = edges[i];
1034:     PetscBTSet(sub_schurs->is_edge,n_faces+i);
1035:   }
1036:   PetscFree(faces);
1037:   PetscFree(edges);
1038:   sub_schurs->is_dir = NULL;
1039:   PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);

1041:   /* Determine if MUMPS can be used */
1042:   sub_schurs->use_mumps = PETSC_FALSE;
1043: #if defined(PETSC_HAVE_MUMPS)
1044:   sub_schurs->use_mumps = PETSC_TRUE;
1045: #endif

1047:   PetscObjectReference((PetscObject)is_I);
1048:   sub_schurs->is_I = is_I;
1049:   PetscObjectReference((PetscObject)is_B);
1050:   sub_schurs->is_B = is_B;
1051:   PetscObjectReference((PetscObject)graph->l2gmap);
1052:   sub_schurs->l2gmap = graph->l2gmap;
1053:   PetscObjectReference((PetscObject)BtoNmap);
1054:   sub_schurs->BtoNmap = BtoNmap;
1055:   sub_schurs->n_subs = n_all_cc;
1056:   sub_schurs->is_subs = all_cc;
1057:   if (!sub_schurs->use_mumps) { /* sort by local ordering mumps is not present */
1058:     for (i=0;i<sub_schurs->n_subs;i++) {
1059:       ISSort(sub_schurs->is_subs[i]);
1060:     }
1061:   }
1062:   sub_schurs->is_vertices = vertices;
1063:   sub_schurs->S_Ej_all = NULL;
1064:   sub_schurs->sum_S_Ej_all = NULL;
1065:   sub_schurs->sum_S_Ej_inv_all = NULL;
1066:   sub_schurs->sum_S_Ej_tilda_all = NULL;
1067:   sub_schurs->is_Ej_all = NULL;
1068:   return(0);
1069: }

1073: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1074: {
1075:   PCBDDCSubSchurs schurs_ctx;
1076:   PetscErrorCode  ierr;

1079:   PetscNew(&schurs_ctx);
1080:   schurs_ctx->n_subs = 0;
1081:   *sub_schurs = schurs_ctx;
1082:   return(0);
1083: }

1087: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
1088: {

1092:   PCBDDCSubSchursReset(*sub_schurs);
1093:   PetscFree(*sub_schurs);
1094:   return(0);
1095: }

1099: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1100: {
1101:   PetscInt       i;

1105:   MatDestroy(&sub_schurs->A);
1106:   MatDestroy(&sub_schurs->S);
1107:   ISDestroy(&sub_schurs->is_I);
1108:   ISDestroy(&sub_schurs->is_B);
1109:   ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1110:   ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1111:   MatDestroy(&sub_schurs->S_Ej_all);
1112:   MatDestroy(&sub_schurs->sum_S_Ej_all);
1113:   MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1114:   MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1115:   ISDestroy(&sub_schurs->is_Ej_all);
1116:   ISDestroy(&sub_schurs->is_vertices);
1117:   ISDestroy(&sub_schurs->is_dir);
1118:   PetscBTDestroy(&sub_schurs->is_edge);
1119:   for (i=0;i<sub_schurs->n_subs;i++) {
1120:     ISDestroy(&sub_schurs->is_subs[i]);
1121:   }
1122:   if (sub_schurs->n_subs) {
1123:     PetscFree(sub_schurs->is_subs);
1124:   }
1125:   if (sub_schurs->reuse_mumps) {
1126:     PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
1127:   }
1128:   PetscFree(sub_schurs->reuse_mumps);
1129:   sub_schurs->n_subs = 0;
1130:   return(0);
1131: }

1135: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
1136: {
1137:   PetscInt       i,j,n;

1141:   n = 0;
1142:   for (i=-n_prev;i<0;i++) {
1143:     PetscInt start_dof = queue_tip[i];
1144:     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
1145:       PetscInt dof = adjncy[j];
1146:       if (!PetscBTLookup(touched,dof)) {
1147:         PetscBTSet(touched,dof);
1148:         queue_tip[n] = dof;
1149:         n++;
1150:       }
1151:     }
1152:   }
1153:   *n_added = n;
1154:   return(0);
1155: }