Actual source code: mpiov.c

petsc-3.4.2 2013-07-02
  2: /*
  3:    Routines to compute overlapping regions of a parallel MPI matrix
  4:   and to find submatrices that were shared across processors.
  5: */
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <petscbt.h>

  9: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS*);
 10: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**);
 11: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*);
 12: extern PetscErrorCode MatGetRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
 13: extern PetscErrorCode MatRestoreRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);

 17: PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
 18: {
 20:   PetscInt       i;

 23:   if (ov < 0) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 24:   for (i=0; i<ov; ++i) {
 25:     MatIncreaseOverlap_MPIAIJ_Once(C,imax,is);
 26:   }
 27:   return(0);
 28: }

 30: /*
 31:   Sample message format:
 32:   If a processor A wants processor B to process some elements corresponding
 33:   to index sets is[1],is[5]
 34:   mesg [0] = 2   (no of index sets in the mesg)
 35:   -----------
 36:   mesg [1] = 1 => is[1]
 37:   mesg [2] = sizeof(is[1]);
 38:   -----------
 39:   mesg [3] = 5  => is[5]
 40:   mesg [4] = sizeof(is[5]);
 41:   -----------
 42:   mesg [5]
 43:   mesg [n]  datas[1]
 44:   -----------
 45:   mesg[n+1]
 46:   mesg[m]  data(is[5])
 47:   -----------

 49:   Notes:
 50:   nrqs - no of requests sent (or to be sent out)
 51:   nrqr - no of requests recieved (which have to be or which have been processed
 52: */
 55: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[])
 56: {
 57:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
 58:   PetscMPIInt    *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2;
 59:   const PetscInt **idx,*idx_i;
 60:   PetscInt       *n,**data,len;
 62:   PetscMPIInt    size,rank,tag1,tag2;
 63:   PetscInt       M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr;
 64:   PetscInt       *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p;
 65:   PetscBT        *table;
 66:   MPI_Comm       comm;
 67:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
 68:   MPI_Status     *s_status,*recv_status;
 69:   char           *t_p;

 72:   PetscObjectGetComm((PetscObject)C,&comm);
 73:   size = c->size;
 74:   rank = c->rank;
 75:   M    = C->rmap->N;

 77:   PetscObjectGetNewTag((PetscObject)C,&tag1);
 78:   PetscObjectGetNewTag((PetscObject)C,&tag2);

 80:   PetscMalloc2(imax,PetscInt*,&idx,imax,PetscInt,&n);

 82:   for (i=0; i<imax; i++) {
 83:     ISGetIndices(is[i],&idx[i]);
 84:     ISGetLocalSize(is[i],&n[i]);
 85:   }

 87:   /* evaluate communication - mesg to who,length of mesg, and buffer space
 88:      required. Based on this, buffers are allocated, and data copied into them*/
 89:   PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4);
 90:   PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 91:   PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 92:   PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 93:   for (i=0; i<imax; i++) {
 94:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 95:     idx_i = idx[i];
 96:     len   = n[i];
 97:     for (j=0; j<len; j++) {
 98:       row = idx_i[j];
 99:       if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
100:       PetscLayoutFindOwner(C->rmap,row,&proc);
101:       w4[proc]++;
102:     }
103:     for (j=0; j<size; j++) {
104:       if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
105:     }
106:   }

108:   nrqs     = 0;              /* no of outgoing messages */
109:   msz      = 0;              /* total mesg length (for all proc */
110:   w1[rank] = 0;              /* no mesg sent to intself */
111:   w3[rank] = 0;
112:   for (i=0; i<size; i++) {
113:     if (w1[i])  {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
114:   }
115:   /* pa - is list of processors to communicate with */
116:   PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);
117:   for (i=0,j=0; i<size; i++) {
118:     if (w1[i]) {pa[j] = i; j++;}
119:   }

121:   /* Each message would have a header = 1 + 2*(no of IS) + data */
122:   for (i=0; i<nrqs; i++) {
123:     j      = pa[i];
124:     w1[j] += w2[j] + 2*w3[j];
125:     msz   += w1[j];
126:   }

128:   /* Determine the number of messages to expect, their lengths, from from-ids */
129:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
130:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

132:   /* Now post the Irecvs corresponding to these messages */
133:   PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);

135:   /* Allocate Memory for outgoing messages */
136:   PetscMalloc4(size,PetscInt*,&outdat,size,PetscInt*,&ptr,msz,PetscInt,&tmp,size,PetscInt,&ctr);
137:   PetscMemzero(outdat,size*sizeof(PetscInt*));
138:   PetscMemzero(ptr,size*sizeof(PetscInt*));

140:   {
141:     PetscInt *iptr = tmp,ict  = 0;
142:     for (i=0; i<nrqs; i++) {
143:       j         = pa[i];
144:       iptr     +=  ict;
145:       outdat[j] = iptr;
146:       ict       = w1[j];
147:     }
148:   }

150:   /* Form the outgoing messages */
151:   /*plug in the headers*/
152:   for (i=0; i<nrqs; i++) {
153:     j            = pa[i];
154:     outdat[j][0] = 0;
155:     PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));
156:     ptr[j]       = outdat[j] + 2*w3[j] + 1;
157:   }

159:   /* Memory for doing local proc's work*/
160:   {
161:     PetscMalloc5(imax,PetscBT,&table, imax,PetscInt*,&data, imax,PetscInt,&isz,
162:                         M*imax,PetscInt,&d_p, (M/PETSC_BITS_PER_BYTE+1)*imax,char,&t_p);
163:     PetscMemzero(table,imax*sizeof(PetscBT));
164:     PetscMemzero(data,imax*sizeof(PetscInt*));
165:     PetscMemzero(isz,imax*sizeof(PetscInt));
166:     PetscMemzero(d_p,M*imax*sizeof(PetscInt));
167:     PetscMemzero(t_p,(M/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));

169:     for (i=0; i<imax; i++) {
170:       table[i] = t_p + (M/PETSC_BITS_PER_BYTE+1)*i;
171:       data[i]  = d_p + M*i;
172:     }
173:   }

175:   /* Parse the IS and update local tables and the outgoing buf with the data*/
176:   {
177:     PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
178:     PetscBT  table_i;

180:     for (i=0; i<imax; i++) {
181:       PetscMemzero(ctr,size*sizeof(PetscInt));
182:       n_i     = n[i];
183:       table_i = table[i];
184:       idx_i   = idx[i];
185:       data_i  = data[i];
186:       isz_i   = isz[i];
187:       for (j=0; j<n_i; j++) {   /* parse the indices of each IS */
188:         row  = idx_i[j];
189:         PetscLayoutFindOwner(C->rmap,row,&proc);
190:         if (proc != rank) { /* copy to the outgoing buffer */
191:           ctr[proc]++;
192:           *ptr[proc] = row;
193:           ptr[proc]++;
194:         } else if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; /* Update the local table */
195:       }
196:       /* Update the headers for the current IS */
197:       for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
198:         if ((ctr_j = ctr[j])) {
199:           outdat_j        = outdat[j];
200:           k               = ++outdat_j[0];
201:           outdat_j[2*k]   = ctr_j;
202:           outdat_j[2*k-1] = i;
203:         }
204:       }
205:       isz[i] = isz_i;
206:     }
207:   }

209:   /*  Now  post the sends */
210:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
211:   for (i=0; i<nrqs; ++i) {
212:     j    = pa[i];
213:     MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);
214:   }

216:   /* No longer need the original indices*/
217:   for (i=0; i<imax; ++i) {
218:     ISRestoreIndices(is[i],idx+i);
219:   }
220:   PetscFree2(idx,n);

222:   for (i=0; i<imax; ++i) {
223:     ISDestroy(&is[i]);
224:   }

226:   /* Do Local work*/
227:   MatIncreaseOverlap_MPIAIJ_Local(C,imax,table,isz,data);

229:   /* Receive messages*/
230:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);
231:   if (nrqr) {MPI_Waitall(nrqr,r_waits1,recv_status);}

233:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
234:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}

236:   /* Phase 1 sends are complete - deallocate buffers */
237:   PetscFree4(outdat,ptr,tmp,ctr);
238:   PetscFree4(w1,w2,w3,w4);

240:   PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);
241:   PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);
242:   MatIncreaseOverlap_MPIAIJ_Receive(C,nrqr,rbuf,xdata,isz1);
243:   PetscFree(rbuf[0]);
244:   PetscFree(rbuf);


247:   /* Send the data back*/
248:   /* Do a global reduction to know the buffer space req for incoming messages*/
249:   {
250:     PetscMPIInt *rw1;

252:     PetscMalloc(size*sizeof(PetscMPIInt),&rw1);
253:     PetscMemzero(rw1,size*sizeof(PetscMPIInt));

255:     for (i=0; i<nrqr; ++i) {
256:       proc = recv_status[i].MPI_SOURCE;

258:       if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
259:       rw1[proc] = isz1[i];
260:     }
261:     PetscFree(onodes1);
262:     PetscFree(olengths1);

264:     /* Determine the number of messages to expect, their lengths, from from-ids */
265:     PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);
266:     PetscFree(rw1);
267:   }
268:   /* Now post the Irecvs corresponding to these messages */
269:   PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);

271:   /*  Now  post the sends */
272:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
273:   for (i=0; i<nrqr; ++i) {
274:     j    = recv_status[i].MPI_SOURCE;
275:     MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);
276:   }

278:   /* receive work done on other processors*/
279:   {
280:     PetscInt    is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
281:     PetscMPIInt idex;
282:     PetscBT     table_i;
283:     MPI_Status  *status2;

285:     PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);
286:     for (i=0; i<nrqs; ++i) {
287:       MPI_Waitany(nrqs,r_waits2,&idex,status2+i);
288:       /* Process the message*/
289:       rbuf2_i = rbuf2[idex];
290:       ct1     = 2*rbuf2_i[0]+1;
291:       jmax    = rbuf2[idex][0];
292:       for (j=1; j<=jmax; j++) {
293:         max     = rbuf2_i[2*j];
294:         is_no   = rbuf2_i[2*j-1];
295:         isz_i   = isz[is_no];
296:         data_i  = data[is_no];
297:         table_i = table[is_no];
298:         for (k=0; k<max; k++,ct1++) {
299:           row = rbuf2_i[ct1];
300:           if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row;
301:         }
302:         isz[is_no] = isz_i;
303:       }
304:     }

306:     if (nrqr) {MPI_Waitall(nrqr,s_waits2,status2);}
307:     PetscFree(status2);
308:   }

310:   for (i=0; i<imax; ++i) {
311:     ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);
312:   }

314:   PetscFree(onodes2);
315:   PetscFree(olengths2);

317:   PetscFree(pa);
318:   PetscFree(rbuf2[0]);
319:   PetscFree(rbuf2);
320:   PetscFree(s_waits1);
321:   PetscFree(r_waits1);
322:   PetscFree(s_waits2);
323:   PetscFree(r_waits2);
324:   PetscFree5(table,data,isz,d_p,t_p);
325:   PetscFree(s_status);
326:   PetscFree(recv_status);
327:   PetscFree(xdata[0]);
328:   PetscFree(xdata);
329:   PetscFree(isz1);
330:   return(0);
331: }

335: /*
336:    MatIncreaseOverlap_MPIAIJ_Local - Called by MatincreaseOverlap, to do
337:        the work on the local processor.

339:      Inputs:
340:       C      - MAT_MPIAIJ;
341:       imax - total no of index sets processed at a time;
342:       table  - an array of char - size = m bits.

344:      Output:
345:       isz    - array containing the count of the solution elements corresponding
346:                to each index set;
347:       data   - pointer to the solutions
348: */
349: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
350: {
351:   Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
352:   Mat        A  = c->A,B = c->B;
353:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
354:   PetscInt   start,end,val,max,rstart,cstart,*ai,*aj;
355:   PetscInt   *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
356:   PetscBT    table_i;

359:   rstart = C->rmap->rstart;
360:   cstart = C->cmap->rstart;
361:   ai     = a->i;
362:   aj     = a->j;
363:   bi     = b->i;
364:   bj     = b->j;
365:   garray = c->garray;


368:   for (i=0; i<imax; i++) {
369:     data_i  = data[i];
370:     table_i = table[i];
371:     isz_i   = isz[i];
372:     for (j=0,max=isz[i]; j<max; j++) {
373:       row   = data_i[j] - rstart;
374:       start = ai[row];
375:       end   = ai[row+1];
376:       for (k=start; k<end; k++) { /* Amat */
377:         val = aj[k] + cstart;
378:         if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val;
379:       }
380:       start = bi[row];
381:       end   = bi[row+1];
382:       for (k=start; k<end; k++) { /* Bmat */
383:         val = garray[bj[k]];
384:         if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val;
385:       }
386:     }
387:     isz[i] = isz_i;
388:   }
389:   return(0);
390: }

394: /*
395:       MatIncreaseOverlap_MPIAIJ_Receive - Process the recieved messages,
396:          and return the output

398:          Input:
399:            C    - the matrix
400:            nrqr - no of messages being processed.
401:            rbuf - an array of pointers to the recieved requests

403:          Output:
404:            xdata - array of messages to be sent back
405:            isz1  - size of each message

407:   For better efficiency perhaps we should malloc separately each xdata[i],
408: then if a remalloc is required we need only copy the data for that one row
409: rather then all previous rows as it is now where a single large chunck of
410: memory is used.

412: */
413: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
414: {
415:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
416:   Mat            A  = c->A,B = c->B;
417:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
419:   PetscInt       rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
420:   PetscInt       row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
421:   PetscInt       val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr;
422:   PetscInt       *rbuf_i,kmax,rbuf_0;
423:   PetscBT        xtable;

426:   m      = C->rmap->N;
427:   rstart = C->rmap->rstart;
428:   cstart = C->cmap->rstart;
429:   ai     = a->i;
430:   aj     = a->j;
431:   bi     = b->i;
432:   bj     = b->j;
433:   garray = c->garray;


436:   for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
437:     rbuf_i =  rbuf[i];
438:     rbuf_0 =  rbuf_i[0];
439:     ct    += rbuf_0;
440:     for (j=1; j<=rbuf_0; j++) total_sz += rbuf_i[2*j];
441:   }

443:   if (C->rmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n;
444:   else max1 = 1;
445:   mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
446:   PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);
447:   ++no_malloc;
448:   PetscBTCreate(m,&xtable);
449:   PetscMemzero(isz1,nrqr*sizeof(PetscInt));

451:   ct3 = 0;
452:   for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
453:     rbuf_i =  rbuf[i];
454:     rbuf_0 =  rbuf_i[0];
455:     ct1    =  2*rbuf_0+1;
456:     ct2    =  ct1;
457:     ct3   += ct1;
458:     for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
459:       PetscBTMemzero(m,xtable);
460:       oct2 = ct2;
461:       kmax = rbuf_i[2*j];
462:       for (k=0; k<kmax; k++,ct1++) {
463:         row = rbuf_i[ct1];
464:         if (!PetscBTLookupSet(xtable,row)) {
465:           if (!(ct3 < mem_estimate)) {
466:             new_estimate = (PetscInt)(1.5*mem_estimate)+1;
467:             PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
468:             PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
469:             PetscFree(xdata[0]);
470:             xdata[0]     = tmp;
471:             mem_estimate = new_estimate; ++no_malloc;
472:             for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
473:           }
474:           xdata[i][ct2++] = row;
475:           ct3++;
476:         }
477:       }
478:       for (k=oct2,max2=ct2; k<max2; k++) {
479:         row   = xdata[i][k] - rstart;
480:         start = ai[row];
481:         end   = ai[row+1];
482:         for (l=start; l<end; l++) {
483:           val = aj[l] + cstart;
484:           if (!PetscBTLookupSet(xtable,val)) {
485:             if (!(ct3 < mem_estimate)) {
486:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
487:               PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
488:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
489:               PetscFree(xdata[0]);
490:               xdata[0]     = tmp;
491:               mem_estimate = new_estimate; ++no_malloc;
492:               for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
493:             }
494:             xdata[i][ct2++] = val;
495:             ct3++;
496:           }
497:         }
498:         start = bi[row];
499:         end   = bi[row+1];
500:         for (l=start; l<end; l++) {
501:           val = garray[bj[l]];
502:           if (!PetscBTLookupSet(xtable,val)) {
503:             if (!(ct3 < mem_estimate)) {
504:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
505:               PetscMalloc(new_estimate*sizeof(PetscInt),&tmp);
506:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
507:               PetscFree(xdata[0]);
508:               xdata[0]     = tmp;
509:               mem_estimate = new_estimate; ++no_malloc;
510:               for (ctr =1; ctr <=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
511:             }
512:             xdata[i][ct2++] = val;
513:             ct3++;
514:           }
515:         }
516:       }
517:       /* Update the header*/
518:       xdata[i][2*j]   = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
519:       xdata[i][2*j-1] = rbuf_i[2*j-1];
520:     }
521:     xdata[i][0] = rbuf_0;
522:     xdata[i+1]  = xdata[i] + ct2;
523:     isz1[i]     = ct2; /* size of each message */
524:   }
525:   PetscBTDestroy(&xtable);
526:   PetscInfo3(C,"Allocated %D bytes, required %D bytes, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);
527:   return(0);
528: }
529: /* -------------------------------------------------------------------------*/
530: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
531: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
532: /*
533:     Every processor gets the entire matrix
534: */
537: PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat A,MatGetSubMatrixOption flag,MatReuse scall,Mat *Bin[])
538: {
539:   Mat            B;
540:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
541:   Mat_SeqAIJ     *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data;
543:   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
544:   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j;
545:   PetscInt       m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
546:   MatScalar      *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf;

549:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
550:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);

552:   if (scall == MAT_INITIAL_MATRIX) {
553:     /* ----------------------------------------------------------------
554:          Tell every processor the number of nonzeros per row
555:     */
556:     PetscMalloc(A->rmap->N*sizeof(PetscInt),&lens);
557:     for (i=A->rmap->rstart; i<A->rmap->rend; i++) {
558:       lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart];
559:     }
560:     sendcount = A->rmap->rend - A->rmap->rstart;
561:     PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
562:     for (i=0; i<size; i++) {
563:       recvcounts[i] = A->rmap->range[i+1] - A->rmap->range[i];
564:       displs[i]     = A->rmap->range[i];
565:     }
566: #if defined(PETSC_HAVE_MPI_IN_PLACE)
567:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
568: #else
569:     MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
570: #endif
571:     /* ---------------------------------------------------------------
572:          Create the sequential matrix of the same type as the local block diagonal
573:     */
574:     MatCreate(PETSC_COMM_SELF,&B);
575:     MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);
576:     MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
577:     MatSetType(B,((PetscObject)a->A)->type_name);
578:     MatSeqAIJSetPreallocation(B,0,lens);
579:     PetscMalloc(sizeof(Mat),Bin);
580:     **Bin = B;
581:     b     = (Mat_SeqAIJ*)B->data;

583:     /*--------------------------------------------------------------------
584:        Copy my part of matrix column indices over
585:     */
586:     sendcount  = ad->nz + bd->nz;
587:     jsendbuf   = b->j + b->i[rstarts[rank]];
588:     a_jsendbuf = ad->j;
589:     b_jsendbuf = bd->j;
590:     n          = A->rmap->rend - A->rmap->rstart;
591:     cnt        = 0;
592:     for (i=0; i<n; i++) {

594:       /* put in lower diagonal portion */
595:       m = bd->i[i+1] - bd->i[i];
596:       while (m > 0) {
597:         /* is it above diagonal (in bd (compressed) numbering) */
598:         if (garray[*b_jsendbuf] > A->rmap->rstart + i) break;
599:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
600:         m--;
601:       }

603:       /* put in diagonal portion */
604:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
605:         jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++;
606:       }

608:       /* put in upper diagonal portion */
609:       while (m-- > 0) {
610:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
611:       }
612:     }
613:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);

615:     /*--------------------------------------------------------------------
616:        Gather all column indices to all processors
617:     */
618:     for (i=0; i<size; i++) {
619:       recvcounts[i] = 0;
620:       for (j=A->rmap->range[i]; j<A->rmap->range[i+1]; j++) {
621:         recvcounts[i] += lens[j];
622:       }
623:     }
624:     displs[0] = 0;
625:     for (i=1; i<size; i++) {
626:       displs[i] = displs[i-1] + recvcounts[i-1];
627:     }
628: #if defined(PETSC_HAVE_MPI_IN_PLACE)
629:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
630: #else
631:     MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
632: #endif
633:     /*--------------------------------------------------------------------
634:         Assemble the matrix into useable form (note numerical values not yet set)
635:     */
636:     /* set the b->ilen (length of each row) values */
637:     PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));
638:     /* set the b->i indices */
639:     b->i[0] = 0;
640:     for (i=1; i<=A->rmap->N; i++) {
641:       b->i[i] = b->i[i-1] + lens[i-1];
642:     }
643:     PetscFree(lens);
644:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
645:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

647:   } else {
648:     B = **Bin;
649:     b = (Mat_SeqAIJ*)B->data;
650:   }

652:   /*--------------------------------------------------------------------
653:        Copy my part of matrix numerical values into the values location
654:   */
655:   if (flag == MAT_GET_VALUES) {
656:     sendcount = ad->nz + bd->nz;
657:     sendbuf   = b->a + b->i[rstarts[rank]];
658:     a_sendbuf = ad->a;
659:     b_sendbuf = bd->a;
660:     b_sendj   = bd->j;
661:     n         = A->rmap->rend - A->rmap->rstart;
662:     cnt       = 0;
663:     for (i=0; i<n; i++) {

665:       /* put in lower diagonal portion */
666:       m = bd->i[i+1] - bd->i[i];
667:       while (m > 0) {
668:         /* is it above diagonal (in bd (compressed) numbering) */
669:         if (garray[*b_sendj] > A->rmap->rstart + i) break;
670:         sendbuf[cnt++] = *b_sendbuf++;
671:         m--;
672:         b_sendj++;
673:       }

675:       /* put in diagonal portion */
676:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
677:         sendbuf[cnt++] = *a_sendbuf++;
678:       }

680:       /* put in upper diagonal portion */
681:       while (m-- > 0) {
682:         sendbuf[cnt++] = *b_sendbuf++;
683:         b_sendj++;
684:       }
685:     }
686:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);

688:     /* -----------------------------------------------------------------
689:        Gather all numerical values to all processors
690:     */
691:     if (!recvcounts) {
692:       PetscMalloc2(size,PetscMPIInt,&recvcounts,size,PetscMPIInt,&displs);
693:     }
694:     for (i=0; i<size; i++) {
695:       recvcounts[i] = b->i[rstarts[i+1]] - b->i[rstarts[i]];
696:     }
697:     displs[0] = 0;
698:     for (i=1; i<size; i++) {
699:       displs[i] = displs[i-1] + recvcounts[i-1];
700:     }
701:     recvbuf = b->a;
702: #if defined(PETSC_HAVE_MPI_IN_PLACE)
703:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));
704: #else
705:     MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));
706: #endif
707:   }  /* endof (flag == MAT_GET_VALUES) */
708:   PetscFree2(recvcounts,displs);

710:   if (A->symmetric) {
711:     MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
712:   } else if (A->hermitian) {
713:     MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
714:   } else if (A->structurally_symmetric) {
715:     MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
716:   }
717:   return(0);
718: }



724: PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
725: {
727:   PetscInt       nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol;
728:   PetscBool      rowflag,colflag,wantallmatrix=PETSC_FALSE,twantallmatrix,*allcolumns;

731:   /* Currently, unsorted column indices will result in inverted column indices in the resulting submatrices. */
732:   /* It would make sense to error out in MatGetSubMatrices_MPIAIJ_Local(), the most impl-specific level.
733:      However, there are more careful users of MatGetSubMatrices_MPIAIJ_Local() -- MatPermute_MPIAIJ() -- that
734:      take care to order the result correctly by assembling it with MatSetValues() (after preallocating).
735:    */
736:   for (i = 0; i < ismax; ++i) {
737:     PetscBool sorted;
738:     ISSorted(iscol[i], &sorted);
739:     if (!sorted) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_SUP, "Column index set %D not sorted", i);
740:   }

742:   /*
743:        Check for special case: each processor gets entire matrix
744:   */
745:   if (ismax == 1 && C->rmap->N == C->cmap->N) {
746:     ISIdentity(*isrow,&rowflag);
747:     ISIdentity(*iscol,&colflag);
748:     ISGetLocalSize(*isrow,&nrow);
749:     ISGetLocalSize(*iscol,&ncol);
750:     if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) {
751:       wantallmatrix = PETSC_TRUE;

753:       PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,NULL);
754:     }
755:   }
756:   MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPIU_BOOL,MPI_MIN,PetscObjectComm((PetscObject)C));
757:   if (twantallmatrix) {
758:     MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);
759:     return(0);
760:   }

762:   /* Allocate memory to hold all the submatrices */
763:   if (scall != MAT_REUSE_MATRIX) {
764:     PetscMalloc((ismax+1)*sizeof(Mat),submat);
765:   }

767:   /* Check for special case: each processor gets entire matrix columns */
768:   PetscMalloc((ismax+1)*sizeof(PetscBool),&allcolumns);
769:   for (i=0; i<ismax; i++) {
770:     ISIdentity(iscol[i],&colflag);
771:     ISGetLocalSize(iscol[i],&ncol);
772:     if (colflag && ncol == C->cmap->N) {
773:       allcolumns[i] = PETSC_TRUE;
774:     } else {
775:       allcolumns[i] = PETSC_FALSE;
776:     }
777:   }

779:   /* Determine the number of stages through which submatrices are done */
780:   nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt));

782:   /*
783:      Each stage will extract nmax submatrices.
784:      nmax is determined by the matrix column dimension.
785:      If the original matrix has 20M columns, only one submatrix per stage is allowed, etc.
786:   */
787:   if (!nmax) nmax = 1;
788:   nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0);

790:   /* Make sure every processor loops through the nstages */
791:   MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));

793:   for (i=0,pos=0; i<nstages; i++) {
794:     if (pos+nmax <= ismax) max_no = nmax;
795:     else if (pos == ismax) max_no = 0;
796:     else                   max_no = ismax-pos;
797:     MatGetSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,allcolumns+pos,*submat+pos);
798:     pos += max_no;
799:   }

801:   PetscFree(allcolumns);
802:   return(0);
803: }

805: /* -------------------------------------------------------------------------*/
808: PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,PetscBool *allcolumns,Mat *submats)
809: {
810:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
811:   Mat            A  = c->A;
812:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat;
813:   const PetscInt **icol,**irow;
814:   PetscInt       *nrow,*ncol,start;
816:   PetscMPIInt    rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr;
817:   PetscInt       **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
818:   PetscInt       nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
819:   PetscInt       **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
820:   PetscInt       **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
821: #if defined(PETSC_USE_CTABLE)
822:   PetscTable *cmap,cmap_i=NULL,*rmap,rmap_i;
823: #else
824:   PetscInt **cmap,*cmap_i=NULL,**rmap,*rmap_i;
825: #endif
826:   const PetscInt *irow_i;
827:   PetscInt       ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
828:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
829:   MPI_Request    *r_waits4,*s_waits3,*s_waits4;
830:   MPI_Status     *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
831:   MPI_Status     *r_status3,*r_status4,*s_status4;
832:   MPI_Comm       comm;
833:   PetscScalar    **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i;
834:   PetscMPIInt    *onodes1,*olengths1;
835:   PetscMPIInt    idex,idex2,end;

838:   PetscObjectGetComm((PetscObject)C,&comm);
839:   tag0 = ((PetscObject)C)->tag;
840:   size = c->size;
841:   rank = c->rank;

843:   /* Get some new tags to keep the communication clean */
844:   PetscObjectGetNewTag((PetscObject)C,&tag1);
845:   PetscObjectGetNewTag((PetscObject)C,&tag2);
846:   PetscObjectGetNewTag((PetscObject)C,&tag3);

848:   PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);

850:   for (i=0; i<ismax; i++) {
851:     ISGetIndices(isrow[i],&irow[i]);
852:     ISGetLocalSize(isrow[i],&nrow[i]);
853:     if (allcolumns[i]) {
854:       icol[i] = NULL;
855:       ncol[i] = C->cmap->N;
856:     } else {
857:       ISGetIndices(iscol[i],&icol[i]);
858:       ISGetLocalSize(iscol[i],&ncol[i]);
859:     }
860:   }

862:   /* evaluate communication - mesg to who, length of mesg, and buffer space
863:      required. Based on this, buffers are allocated, and data copied into them*/
864:   PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscMPIInt,&w3,size,PetscMPIInt,&w4);   /* mesg size */
865:   PetscMemzero(w1,size*sizeof(PetscMPIInt));   /* initialize work vector*/
866:   PetscMemzero(w2,size*sizeof(PetscMPIInt));   /* initialize work vector*/
867:   PetscMemzero(w3,size*sizeof(PetscMPIInt));   /* initialize work vector*/
868:   for (i=0; i<ismax; i++) {
869:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialize work vector*/
870:     jmax   = nrow[i];
871:     irow_i = irow[i];
872:     for (j=0; j<jmax; j++) {
873:       l   = 0;
874:       row = irow_i[j];
875:       while (row >= C->rmap->range[l+1]) l++;
876:       proc = l;
877:       w4[proc]++;
878:     }
879:     for (j=0; j<size; j++) {
880:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
881:     }
882:   }

884:   nrqs     = 0;              /* no of outgoing messages */
885:   msz      = 0;              /* total mesg length (for all procs) */
886:   w1[rank] = 0;              /* no mesg sent to self */
887:   w3[rank] = 0;
888:   for (i=0; i<size; i++) {
889:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
890:   }
891:   PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa); /*(proc -array)*/
892:   for (i=0,j=0; i<size; i++) {
893:     if (w1[i]) { pa[j] = i; j++; }
894:   }

896:   /* Each message would have a header = 1 + 2*(no of IS) + data */
897:   for (i=0; i<nrqs; i++) {
898:     j      = pa[i];
899:     w1[j] += w2[j] + 2* w3[j];
900:     msz   += w1[j];
901:   }
902:   PetscInfo2(0,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);

904:   /* Determine the number of messages to expect, their lengths, from from-ids */
905:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
906:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

908:   /* Now post the Irecvs corresponding to these messages */
909:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);

911:   PetscFree(onodes1);
912:   PetscFree(olengths1);

914:   /* Allocate Memory for outgoing messages */
915:   PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);
916:   PetscMemzero(sbuf1,size*sizeof(PetscInt*));
917:   PetscMemzero(ptr,size*sizeof(PetscInt*));

919:   {
920:     PetscInt *iptr = tmp,ict = 0;
921:     for (i=0; i<nrqs; i++) {
922:       j        = pa[i];
923:       iptr    += ict;
924:       sbuf1[j] = iptr;
925:       ict      = w1[j];
926:     }
927:   }

929:   /* Form the outgoing messages */
930:   /* Initialize the header space */
931:   for (i=0; i<nrqs; i++) {
932:     j           = pa[i];
933:     sbuf1[j][0] = 0;
934:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));
935:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
936:   }

938:   /* Parse the isrow and copy data into outbuf */
939:   for (i=0; i<ismax; i++) {
940:     PetscMemzero(ctr,size*sizeof(PetscInt));
941:     irow_i = irow[i];
942:     jmax   = nrow[i];
943:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
944:       l   = 0;
945:       row = irow_i[j];
946:       while (row >= C->rmap->range[l+1]) l++;
947:       proc = l;
948:       if (proc != rank) { /* copy to the outgoing buf*/
949:         ctr[proc]++;
950:         *ptr[proc] = row;
951:         ptr[proc]++;
952:       }
953:     }
954:     /* Update the headers for the current IS */
955:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
956:       if ((ctr_j = ctr[j])) {
957:         sbuf1_j        = sbuf1[j];
958:         k              = ++sbuf1_j[0];
959:         sbuf1_j[2*k]   = ctr_j;
960:         sbuf1_j[2*k-1] = i;
961:       }
962:     }
963:   }

965:   /*  Now  post the sends */
966:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
967:   for (i=0; i<nrqs; ++i) {
968:     j    = pa[i];
969:     MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);
970:   }

972:   /* Post Receives to capture the buffer size */
973:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
974:   PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);
975:   rbuf2[0] = tmp + msz;
976:   for (i=1; i<nrqs; ++i) {
977:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
978:   }
979:   for (i=0; i<nrqs; ++i) {
980:     j    = pa[i];
981:     MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);
982:   }

984:   /* Send to other procs the buf size they should allocate */


987:   /* Receive messages*/
988:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
989:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
990:   PetscMalloc3(nrqr,PetscInt*,&sbuf2,nrqr,PetscInt,&req_size,nrqr,PetscInt,&req_source);
991:   {
992:     Mat_SeqAIJ *sA  = (Mat_SeqAIJ*)c->A->data,*sB = (Mat_SeqAIJ*)c->B->data;
993:     PetscInt   *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart;
994:     PetscInt   *sbuf2_i;

996:     for (i=0; i<nrqr; ++i) {
997:       MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);

999:       req_size[idex] = 0;
1000:       rbuf1_i        = rbuf1[idex];
1001:       start          = 2*rbuf1_i[0] + 1;
1002:       MPI_Get_count(r_status1+i,MPIU_INT,&end);
1003:       PetscMalloc((end+1)*sizeof(PetscInt),&sbuf2[idex]);
1004:       sbuf2_i        = sbuf2[idex];
1005:       for (j=start; j<end; j++) {
1006:         id              = rbuf1_i[j] - rstart;
1007:         ncols           = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
1008:         sbuf2_i[j]      = ncols;
1009:         req_size[idex] += ncols;
1010:       }
1011:       req_source[idex] = r_status1[i].MPI_SOURCE;
1012:       /* form the header */
1013:       sbuf2_i[0] = req_size[idex];
1014:       for (j=1; j<start; j++) sbuf2_i[j] = rbuf1_i[j];

1016:       MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);
1017:     }
1018:   }
1019:   PetscFree(r_status1);
1020:   PetscFree(r_waits1);

1022:   /*  recv buffer sizes */
1023:   /* Receive messages*/

1025:   PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);
1026:   PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf4);
1027:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
1028:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
1029:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

1031:   for (i=0; i<nrqs; ++i) {
1032:     MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);
1033:     PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscInt),&rbuf3[idex]);
1034:     PetscMalloc((rbuf2[idex][0]+1)*sizeof(PetscScalar),&rbuf4[idex]);
1035:     MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);
1036:     MPI_Irecv(rbuf4[idex],rbuf2[idex][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);
1037:   }
1038:   PetscFree(r_status2);
1039:   PetscFree(r_waits2);

1041:   /* Wait on sends1 and sends2 */
1042:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
1043:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

1045:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
1046:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
1047:   PetscFree(s_status1);
1048:   PetscFree(s_status2);
1049:   PetscFree(s_waits1);
1050:   PetscFree(s_waits2);

1052:   /* Now allocate buffers for a->j, and send them off */
1053:   PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);
1054:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1055:   PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);
1056:   for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];

1058:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
1059:   {
1060:     PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i,lwrite;
1061:     PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1062:     PetscInt cend = C->cmap->rend;
1063:     PetscInt *a_j = a->j,*b_j = b->j,ctmp;

1065:     for (i=0; i<nrqr; i++) {
1066:       rbuf1_i   = rbuf1[i];
1067:       sbuf_aj_i = sbuf_aj[i];
1068:       ct1       = 2*rbuf1_i[0] + 1;
1069:       ct2       = 0;
1070:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1071:         kmax = rbuf1[i][2*j];
1072:         for (k=0; k<kmax; k++,ct1++) {
1073:           row    = rbuf1_i[ct1] - rstart;
1074:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1075:           ncols  = nzA + nzB;
1076:           cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];

1078:           /* load the column indices for this row into cols*/
1079:           cols = sbuf_aj_i + ct2;

1081:           lwrite = 0;
1082:           for (l=0; l<nzB; l++) {
1083:             if ((ctmp = bmap[cworkB[l]]) < cstart) cols[lwrite++] = ctmp;
1084:           }
1085:           for (l=0; l<nzA; l++) cols[lwrite++] = cstart + cworkA[l];
1086:           for (l=0; l<nzB; l++) {
1087:             if ((ctmp = bmap[cworkB[l]]) >= cend) cols[lwrite++] = ctmp;
1088:           }

1090:           ct2 += ncols;
1091:         }
1092:       }
1093:       MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);
1094:     }
1095:   }
1096:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
1097:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

1099:   /* Allocate buffers for a->a, and send them off */
1100:   PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf_aa);
1101:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1102:   PetscMalloc((j+1)*sizeof(PetscScalar),&sbuf_aa[0]);
1103:   for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];

1105:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
1106:   {
1107:     PetscInt    nzA,nzB,*a_i = a->i,*b_i = b->i, *cworkB,lwrite;
1108:     PetscInt    cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1109:     PetscInt    cend   = C->cmap->rend;
1110:     PetscInt    *b_j   = b->j;
1111:     PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a;

1113:     for (i=0; i<nrqr; i++) {
1114:       rbuf1_i   = rbuf1[i];
1115:       sbuf_aa_i = sbuf_aa[i];
1116:       ct1       = 2*rbuf1_i[0]+1;
1117:       ct2       = 0;
1118:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1119:         kmax = rbuf1_i[2*j];
1120:         for (k=0; k<kmax; k++,ct1++) {
1121:           row    = rbuf1_i[ct1] - rstart;
1122:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1123:           ncols  = nzA + nzB;
1124:           cworkB = b_j + b_i[row];
1125:           vworkA = a_a + a_i[row];
1126:           vworkB = b_a + b_i[row];

1128:           /* load the column values for this row into vals*/
1129:           vals = sbuf_aa_i+ct2;

1131:           lwrite = 0;
1132:           for (l=0; l<nzB; l++) {
1133:             if ((bmap[cworkB[l]]) < cstart) vals[lwrite++] = vworkB[l];
1134:           }
1135:           for (l=0; l<nzA; l++) vals[lwrite++] = vworkA[l];
1136:           for (l=0; l<nzB; l++) {
1137:             if ((bmap[cworkB[l]]) >= cend) vals[lwrite++] = vworkB[l];
1138:           }

1140:           ct2 += ncols;
1141:         }
1142:       }
1143:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
1144:     }
1145:   }
1146:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
1147:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
1148:   PetscFree(rbuf1[0]);
1149:   PetscFree(rbuf1);

1151:   /* Form the matrix */
1152:   /* create col map: global col of C -> local col of submatrices */
1153:   {
1154:     const PetscInt *icol_i;
1155: #if defined(PETSC_USE_CTABLE)
1156:     PetscMalloc((1+ismax)*sizeof(PetscTable),&cmap);
1157:     for (i=0; i<ismax; i++) {
1158:       if (!allcolumns[i]) {
1159:         PetscTableCreate(ncol[i]+1,C->cmap->N+1,&cmap[i]);

1161:         jmax   = ncol[i];
1162:         icol_i = icol[i];
1163:         cmap_i = cmap[i];
1164:         for (j=0; j<jmax; j++) {
1165:           PetscTableAdd(cmap[i],icol_i[j]+1,j+1,INSERT_VALUES);
1166:         }
1167:       } else {
1168:         cmap[i] = NULL;
1169:       }
1170:     }
1171: #else
1172:     PetscMalloc(ismax*sizeof(PetscInt*),&cmap);
1173:     for (i=0; i<ismax; i++) {
1174:       if (!allcolumns[i]) {
1175:         PetscMalloc(C->cmap->N*sizeof(PetscInt),&cmap[i]);
1176:         PetscMemzero(cmap[i],C->cmap->N*sizeof(PetscInt));
1177:         jmax   = ncol[i];
1178:         icol_i = icol[i];
1179:         cmap_i = cmap[i];
1180:         for (j=0; j<jmax; j++) {
1181:           cmap_i[icol_i[j]] = j+1;
1182:         }
1183:       } else {
1184:         cmap[i] = NULL;
1185:       }
1186:     }
1187: #endif
1188:   }

1190:   /* Create lens which is required for MatCreate... */
1191:   for (i=0,j=0; i<ismax; i++) j += nrow[i];
1192:   PetscMalloc(ismax*sizeof(PetscInt*),&lens);
1193:   if (ismax) {
1194:     PetscMalloc(j*sizeof(PetscInt),&lens[0]);
1195:     PetscMemzero(lens[0],j*sizeof(PetscInt));
1196:   }
1197:   for (i=1; i<ismax; i++) lens[i] = lens[i-1] + nrow[i-1];

1199:   /* Update lens from local data */
1200:   for (i=0; i<ismax; i++) {
1201:     jmax = nrow[i];
1202:     if (!allcolumns[i]) cmap_i = cmap[i];
1203:     irow_i = irow[i];
1204:     lens_i = lens[i];
1205:     for (j=0; j<jmax; j++) {
1206:       l   = 0;
1207:       row = irow_i[j];
1208:       while (row >= C->rmap->range[l+1]) l++;
1209:       proc = l;
1210:       if (proc == rank) {
1211:         MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);
1212:         if (!allcolumns[i]) {
1213:           for (k=0; k<ncols; k++) {
1214: #if defined(PETSC_USE_CTABLE)
1215:             PetscTableFind(cmap_i,cols[k]+1,&tcol);
1216: #else
1217:             tcol = cmap_i[cols[k]];
1218: #endif
1219:             if (tcol) lens_i[j]++;
1220:           }
1221:         } else { /* allcolumns */
1222:           lens_i[j] = ncols;
1223:         }
1224:         MatRestoreRow_MPIAIJ(C,row,&ncols,&cols,0);
1225:       }
1226:     }
1227:   }

1229:   /* Create row map: global row of C -> local row of submatrices */
1230: #if defined(PETSC_USE_CTABLE)
1231:   PetscMalloc((1+ismax)*sizeof(PetscTable),&rmap);
1232:   for (i=0; i<ismax; i++) {
1233:     PetscTableCreate(nrow[i]+1,C->rmap->N+1,&rmap[i]);
1234:     rmap_i = rmap[i];
1235:     irow_i = irow[i];
1236:     jmax   = nrow[i];
1237:     for (j=0; j<jmax; j++) {
1238:       PetscTableAdd(rmap[i],irow_i[j]+1,j+1,INSERT_VALUES);
1239:     }
1240:   }
1241: #else
1242:   PetscMalloc(ismax*sizeof(PetscInt*),&rmap);
1243:   if (ismax) {
1244:     PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);
1245:     PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));
1246:   }
1247:   for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N;
1248:   for (i=0; i<ismax; i++) {
1249:     rmap_i = rmap[i];
1250:     irow_i = irow[i];
1251:     jmax   = nrow[i];
1252:     for (j=0; j<jmax; j++) {
1253:       rmap_i[irow_i[j]] = j;
1254:     }
1255:   }
1256: #endif

1258:   /* Update lens from offproc data */
1259:   {
1260:     PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i;

1262:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1263:       MPI_Waitany(nrqs,r_waits3,&idex2,r_status3+tmp2);
1264:       idex    = pa[idex2];
1265:       sbuf1_i = sbuf1[idex];
1266:       jmax    = sbuf1_i[0];
1267:       ct1     = 2*jmax+1;
1268:       ct2     = 0;
1269:       rbuf2_i = rbuf2[idex2];
1270:       rbuf3_i = rbuf3[idex2];
1271:       for (j=1; j<=jmax; j++) {
1272:         is_no  = sbuf1_i[2*j-1];
1273:         max1   = sbuf1_i[2*j];
1274:         lens_i = lens[is_no];
1275:         if (!allcolumns[is_no]) cmap_i = cmap[is_no];
1276:         rmap_i = rmap[is_no];
1277:         for (k=0; k<max1; k++,ct1++) {
1278: #if defined(PETSC_USE_CTABLE)
1279:           PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);
1280:           row--;
1281:           if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table");
1282: #else
1283:           row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1284: #endif
1285:           max2 = rbuf2_i[ct1];
1286:           for (l=0; l<max2; l++,ct2++) {
1287:             if (!allcolumns[is_no]) {
1288: #if defined(PETSC_USE_CTABLE)
1289:               PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1290: #else
1291:               tcol = cmap_i[rbuf3_i[ct2]];
1292: #endif
1293:               if (tcol) lens_i[row]++;
1294:             } else { /* allcolumns */
1295:               lens_i[row]++; /* lens_i[row] += max2 ? */
1296:             }
1297:           }
1298:         }
1299:       }
1300:     }
1301:   }
1302:   PetscFree(r_status3);
1303:   PetscFree(r_waits3);
1304:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
1305:   PetscFree(s_status3);
1306:   PetscFree(s_waits3);

1308:   /* Create the submatrices */
1309:   if (scall == MAT_REUSE_MATRIX) {
1310:     PetscBool flag;

1312:     /*
1313:         Assumes new rows are same length as the old rows,hence bug!
1314:     */
1315:     for (i=0; i<ismax; i++) {
1316:       mat = (Mat_SeqAIJ*)(submats[i]->data);
1317:       if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1318:       PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);
1319:       if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
1320:       /* Initial matrix as if empty */
1321:       PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));

1323:       submats[i]->factortype = C->factortype;
1324:     }
1325:   } else {
1326:     for (i=0; i<ismax; i++) {
1327:       PetscInt rbs,cbs;
1328:       ISGetBlockSize(isrow[i],&rbs);
1329:       ISGetBlockSize(iscol[i],&cbs);

1331:       MatCreate(PETSC_COMM_SELF,submats+i);
1332:       MatSetSizes(submats[i],nrow[i],ncol[i],PETSC_DETERMINE,PETSC_DETERMINE);

1334:       MatSetBlockSizes(submats[i],rbs,cbs);
1335:       MatSetType(submats[i],((PetscObject)A)->type_name);
1336:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
1337:     }
1338:   }

1340:   /* Assemble the matrices */
1341:   /* First assemble the local rows */
1342:   {
1343:     PetscInt    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
1344:     PetscScalar *imat_a;

1346:     for (i=0; i<ismax; i++) {
1347:       mat       = (Mat_SeqAIJ*)submats[i]->data;
1348:       imat_ilen = mat->ilen;
1349:       imat_j    = mat->j;
1350:       imat_i    = mat->i;
1351:       imat_a    = mat->a;

1353:       if (!allcolumns[i]) cmap_i = cmap[i];
1354:       rmap_i = rmap[i];
1355:       irow_i = irow[i];
1356:       jmax   = nrow[i];
1357:       for (j=0; j<jmax; j++) {
1358:         l   = 0;
1359:         row = irow_i[j];
1360:         while (row >= C->rmap->range[l+1]) l++;
1361:         proc = l;
1362:         if (proc == rank) {
1363:           old_row = row;
1364: #if defined(PETSC_USE_CTABLE)
1365:           PetscTableFind(rmap_i,row+1,&row);
1366:           row--;
1367: #else
1368:           row = rmap_i[row];
1369: #endif
1370:           ilen_row = imat_ilen[row];
1371:           MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1372:           mat_i    = imat_i[row];
1373:           mat_a    = imat_a + mat_i;
1374:           mat_j    = imat_j + mat_i;
1375:           if (!allcolumns[i]) {
1376:             for (k=0; k<ncols; k++) {
1377: #if defined(PETSC_USE_CTABLE)
1378:               PetscTableFind(cmap_i,cols[k]+1,&tcol);
1379: #else
1380:               tcol = cmap_i[cols[k]];
1381: #endif
1382:               if (tcol) {
1383:                 *mat_j++ = tcol - 1;
1384:                 *mat_a++ = vals[k];
1385:                 ilen_row++;
1386:               }
1387:             }
1388:           } else { /* allcolumns */
1389:             for (k=0; k<ncols; k++) {
1390:               *mat_j++ = cols[k];  /* global col index! */
1391:               *mat_a++ = vals[k];
1392:               ilen_row++;
1393:             }
1394:           }
1395:           MatRestoreRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);

1397:           imat_ilen[row] = ilen_row;
1398:         }
1399:       }
1400:     }
1401:   }

1403:   /*   Now assemble the off proc rows*/
1404:   {
1405:     PetscInt    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1406:     PetscInt    *imat_j,*imat_i;
1407:     PetscScalar *imat_a,*rbuf4_i;

1409:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1410:       MPI_Waitany(nrqs,r_waits4,&idex2,r_status4+tmp2);
1411:       idex    = pa[idex2];
1412:       sbuf1_i = sbuf1[idex];
1413:       jmax    = sbuf1_i[0];
1414:       ct1     = 2*jmax + 1;
1415:       ct2     = 0;
1416:       rbuf2_i = rbuf2[idex2];
1417:       rbuf3_i = rbuf3[idex2];
1418:       rbuf4_i = rbuf4[idex2];
1419:       for (j=1; j<=jmax; j++) {
1420:         is_no     = sbuf1_i[2*j-1];
1421:         rmap_i    = rmap[is_no];
1422:         if (!allcolumns[is_no]) cmap_i = cmap[is_no];
1423:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
1424:         imat_ilen = mat->ilen;
1425:         imat_j    = mat->j;
1426:         imat_i    = mat->i;
1427:         imat_a    = mat->a;
1428:         max1      = sbuf1_i[2*j];
1429:         for (k=0; k<max1; k++,ct1++) {
1430:           row = sbuf1_i[ct1];
1431: #if defined(PETSC_USE_CTABLE)
1432:           PetscTableFind(rmap_i,row+1,&row);
1433:           row--;
1434: #else
1435:           row = rmap_i[row];
1436: #endif
1437:           ilen  = imat_ilen[row];
1438:           mat_i = imat_i[row];
1439:           mat_a = imat_a + mat_i;
1440:           mat_j = imat_j + mat_i;
1441:           max2  = rbuf2_i[ct1];
1442:           if (!allcolumns[is_no]) {
1443:             for (l=0; l<max2; l++,ct2++) {

1445: #if defined(PETSC_USE_CTABLE)
1446:               PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1447: #else
1448:               tcol = cmap_i[rbuf3_i[ct2]];
1449: #endif
1450:               if (tcol) {
1451:                 *mat_j++ = tcol - 1;
1452:                 *mat_a++ = rbuf4_i[ct2];
1453:                 ilen++;
1454:               }
1455:             }
1456:           } else { /* allcolumns */
1457:             for (l=0; l<max2; l++,ct2++) {
1458:               *mat_j++ = rbuf3_i[ct2]; /* same global column index of C */
1459:               *mat_a++ = rbuf4_i[ct2];
1460:               ilen++;
1461:             }
1462:           }
1463:           imat_ilen[row] = ilen;
1464:         }
1465:       }
1466:     }
1467:   }

1469:   PetscFree(r_status4);
1470:   PetscFree(r_waits4);
1471:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
1472:   PetscFree(s_waits4);
1473:   PetscFree(s_status4);

1475:   /* Restore the indices */
1476:   for (i=0; i<ismax; i++) {
1477:     ISRestoreIndices(isrow[i],irow+i);
1478:     if (!allcolumns[i]) {
1479:       ISRestoreIndices(iscol[i],icol+i);
1480:     }
1481:   }

1483:   /* Destroy allocated memory */
1484:   PetscFree4(irow,icol,nrow,ncol);
1485:   PetscFree4(w1,w2,w3,w4);
1486:   PetscFree(pa);

1488:   PetscFree4(sbuf1,ptr,tmp,ctr);
1489:   PetscFree(rbuf2);
1490:   for (i=0; i<nrqr; ++i) {
1491:     PetscFree(sbuf2[i]);
1492:   }
1493:   for (i=0; i<nrqs; ++i) {
1494:     PetscFree(rbuf3[i]);
1495:     PetscFree(rbuf4[i]);
1496:   }

1498:   PetscFree3(sbuf2,req_size,req_source);
1499:   PetscFree(rbuf3);
1500:   PetscFree(rbuf4);
1501:   PetscFree(sbuf_aj[0]);
1502:   PetscFree(sbuf_aj);
1503:   PetscFree(sbuf_aa[0]);
1504:   PetscFree(sbuf_aa);

1506: #if defined(PETSC_USE_CTABLE)
1507:   for (i=0; i<ismax; i++) {PetscTableDestroy((PetscTable*)&rmap[i]);}
1508: #else
1509:   if (ismax) {PetscFree(rmap[0]);}
1510: #endif
1511:   PetscFree(rmap);

1513:   for (i=0; i<ismax; i++) {
1514:     if (!allcolumns[i]) {
1515: #if defined(PETSC_USE_CTABLE)
1516:       PetscTableDestroy((PetscTable*)&cmap[i]);
1517: #else
1518:       PetscFree(cmap[i]);
1519: #endif
1520:     }
1521:   }
1522:   PetscFree(cmap);
1523:   if (ismax) {PetscFree(lens[0]);}
1524:   PetscFree(lens);

1526:   for (i=0; i<ismax; i++) {
1527:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
1528:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
1529:   }
1530:   return(0);
1531: }

1533: /*
1534:  Observe that the Seq matrices used to construct this MPI matrix are not increfed.
1535:  Be careful not to destroy them elsewhere.
1536:  */
1539: PetscErrorCode MatCreateMPIAIJFromSeqMatrices_Private(MPI_Comm comm, Mat A, Mat B, Mat *C)
1540: {
1541:   /* If making this function public, change the error returned in this function away from _PLIB. */
1543:   Mat_MPIAIJ     *aij;
1544:   PetscBool      seqaij;

1547:   /* Check to make sure the component matrices are compatible with C. */
1548:   PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &seqaij);
1549:   if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Diagonal matrix is of wrong type");
1550:   PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &seqaij);
1551:   if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal matrix is of wrong type");
1552:   if (A->rmap->n != B->rmap->n || A->rmap->bs != B->rmap->bs || A->cmap->bs != B->cmap->bs) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");

1554:   MatCreate(comm, C);
1555:   MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
1556:   MatSetBlockSizes(*C,A->rmap->bs, A->cmap->bs);
1557:   PetscLayoutSetUp((*C)->rmap);
1558:   PetscLayoutSetUp((*C)->cmap);
1559:   if ((*C)->cmap->N != A->cmap->n + B->cmap->n) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1560:   MatSetType(*C, MATMPIAIJ);
1561:   aij    = (Mat_MPIAIJ*)((*C)->data);
1562:   aij->A = A;
1563:   aij->B = B;
1564:   PetscLogObjectParent(*C,A);
1565:   PetscLogObjectParent(*C,B);

1567:   (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated);
1568:   (*C)->assembled    = (PetscBool)(A->assembled && B->assembled);
1569:   return(0);
1570: }

1574: PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B)
1575: {
1576:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data);

1581:   *A = aij->A;
1582:   *B = aij->B;
1583:   /* Note that we don't incref *A and *B, so be careful! */
1584:   return(0);
1585: }

1589: PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[],
1590:                                                  PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**),
1591:                                                  PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*),
1592:                                                  PetscErrorCode(*extractseq)(Mat, Mat*, Mat*))
1593: {
1595:   PetscMPIInt    size, flag;
1596:   PetscInt       i,ii;
1597:   PetscInt       ismax_c;

1600:   if (!ismax) return(0);

1602:   for (i = 0, ismax_c = 0; i < ismax; ++i) {
1603:     MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);
1604:     if (flag != MPI_IDENT) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator");
1605:     MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1606:     if (size > 1) ++ismax_c;
1607:   }
1608:   if (!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */
1609:     (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);
1610:   } else { /* if (ismax_c) */
1611:     Mat         *A,*B;
1612:     IS          *isrow_c, *iscol_c;
1613:     PetscMPIInt size;
1614:     /*
1615:      Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate
1616:      array of sequential matrices underlying the resulting parallel matrices.
1617:      Which arrays to allocate is based on the value of MatReuse scall.
1618:      There are as many diag matrices as there are original index sets.
1619:      There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets.

1621:      Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists,
1622:      we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq
1623:      will deposite the extracted diag and off-diag parts.
1624:      However, if reuse is taking place, we have to allocate the seq matrix arrays here.
1625:      If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq.
1626:     */

1628:     /* Parallel matrix array is allocated only if no reuse is taking place. */
1629:     if (scall != MAT_REUSE_MATRIX) {
1630:       PetscMalloc((ismax)*sizeof(Mat),submat);
1631:     } else {
1632:       PetscMalloc(ismax*sizeof(Mat), &A);
1633:       PetscMalloc(ismax_c*sizeof(Mat), &B);
1634:       /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */
1635:       for (i = 0, ii = 0; i < ismax; ++i) {
1636:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1637:         if (size > 1) {
1638:           (*extractseq)((*submat)[i],A+i,B+ii);
1639:           ++ii;
1640:         } else A[i] = (*submat)[i];
1641:       }
1642:     }
1643:     /*
1644:      Construct the complements of the iscol ISs for parallel ISs only.
1645:      These are used to extract the off-diag portion of the resulting parallel matrix.
1646:      The row IS for the off-diag portion is the same as for the diag portion,
1647:      so we merely alias the row IS, while skipping those that are sequential.
1648:     */
1649:     PetscMalloc2(ismax_c,IS,&isrow_c, ismax_c, IS, &iscol_c);
1650:     for (i = 0, ii = 0; i < ismax; ++i) {
1651:       MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1652:       if (size > 1) {
1653:         isrow_c[ii] = isrow[i];

1655:         ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));
1656:         ++ii;
1657:       }
1658:     }
1659:     /* Now obtain the sequential A and B submatrices separately. */
1660:     (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);
1661:     (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);
1662:     for (ii = 0; ii < ismax_c; ++ii) {
1663:       ISDestroy(&iscol_c[ii]);
1664:     }
1665:     PetscFree2(isrow_c, iscol_c);
1666:     /*
1667:      If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B
1668:      have been extracted directly into the parallel matrices containing them, or
1669:      simply into the sequential matrix identical with the corresponding A (if size == 1).
1670:      Otherwise, make sure that parallel matrices are constructed from A & B, or the
1671:      A is put into the correct submat slot (if size == 1).
1672:      */
1673:     if (scall != MAT_REUSE_MATRIX) {
1674:       for (i = 0, ii = 0; i < ismax; ++i) {
1675:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1676:         if (size > 1) {
1677:           /*
1678:            For each parallel isrow[i], create parallel matrices from the extracted sequential matrices.
1679:            */
1680:           /* Construct submat[i] from the Seq pieces A and B. */
1681:           (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);

1683:           ++ii;
1684:         } else (*submat)[i] = A[i];
1685:       }
1686:     }
1687:     PetscFree(A);
1688:     PetscFree(B);
1689:   }
1690:   return(0);
1691: } /* MatGetSubMatricesParallel_MPIXAIJ() */



1697: PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
1698: {

1702:   MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,MatCreateMPIAIJFromSeqMatrices_Private,MatMPIAIJExtractSeqMatrices_Private);
1703:   return(0);
1704: }