Actual source code: vecstash.c

petsc-3.12.0 2019-09-29
Report Typos and Errors

  2:  #include <petsc/private/vecimpl.h>

  4: #define DEFAULT_STASH_SIZE   100

  6: /*
  7:   VecStashCreate_Private - Creates a stash,currently used for all the parallel
  8:   matrix implementations. The stash is where elements of a matrix destined
  9:   to be stored on other processors are kept until matrix assembly is done.

 11:   This is a simple minded stash. Simply adds entries to end of stash.

 13:   Input Parameters:
 14:   comm - communicator, required for scatters.
 15:   bs   - stash block size. used when stashing blocks of values

 17:   Output Parameters:
 18:   stash    - the newly created stash
 19: */
 20: PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
 21: {
 23:   PetscInt       max,*opt,nopt;
 24:   PetscBool      flg;

 27:   /* Require 2 tags, get the second using PetscCommGetNewTag() */
 28:   stash->comm = comm;
 29:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 30:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 31:   MPI_Comm_size(stash->comm,&stash->size);
 32:   MPI_Comm_rank(stash->comm,&stash->rank);

 34:   nopt = stash->size;
 35:   PetscMalloc1(nopt,&opt);
 36:   PetscOptionsGetIntArray(NULL,NULL,"-vecstash_initial_size",opt,&nopt,&flg);
 37:   if (flg) {
 38:     if (nopt == 1)                max = opt[0];
 39:     else if (nopt == stash->size) max = opt[stash->rank];
 40:     else if (stash->rank < nopt)  max = opt[stash->rank];
 41:     else                          max = 0; /* use default */
 42:     stash->umax = max;
 43:   } else {
 44:     stash->umax = 0;
 45:   }
 46:   PetscFree(opt);

 48:   if (bs <= 0) bs = 1;

 50:   stash->bs       = bs;
 51:   stash->nmax     = 0;
 52:   stash->oldnmax  = 0;
 53:   stash->n        = 0;
 54:   stash->reallocs = -1;
 55:   stash->idx      = 0;
 56:   stash->array    = 0;

 58:   stash->send_waits   = 0;
 59:   stash->recv_waits   = 0;
 60:   stash->send_status  = 0;
 61:   stash->nsends       = 0;
 62:   stash->nrecvs       = 0;
 63:   stash->svalues      = 0;
 64:   stash->rvalues      = 0;
 65:   stash->rmax         = 0;
 66:   stash->nprocs       = 0;
 67:   stash->nprocessed   = 0;
 68:   stash->donotstash   = PETSC_FALSE;
 69:   stash->ignorenegidx = PETSC_FALSE;
 70:   return(0);
 71: }

 73: /*
 74:    VecStashDestroy_Private - Destroy the stash
 75: */
 76: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
 77: {

 81:   PetscFree2(stash->array,stash->idx);
 82:   PetscFree(stash->bowners);
 83:   return(0);
 84: }

 86: /*
 87:    VecStashScatterEnd_Private - This is called as the fial stage of
 88:    scatter. The final stages of message passing is done here, and
 89:    all the memory used for message passing is cleanedu up. This
 90:    routine also resets the stash, and deallocates the memory used
 91:    for the stash. It also keeps track of the current memory usage
 92:    so that the same value can be used the next time through.
 93: */
 94: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
 95: {
 97:   PetscInt       nsends=stash->nsends,oldnmax;
 98:   MPI_Status     *send_status;

101:   /* wait on sends */
102:   if (nsends) {
103:     PetscMalloc1(2*nsends,&send_status);
104:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
105:     PetscFree(send_status);
106:   }

108:   /* Now update nmaxold to be app 10% more than max n, this way the
109:      wastage of space is reduced the next time this stash is used.
110:      Also update the oldmax, only if it increases */
111:   if (stash->n) {
112:     oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
113:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
114:   }

116:   stash->nmax       = 0;
117:   stash->n          = 0;
118:   stash->reallocs   = -1;
119:   stash->rmax       = 0;
120:   stash->nprocessed = 0;

122:   PetscFree2(stash->array,stash->idx);
123:   stash->array = 0;
124:   stash->idx   = 0;
125:   PetscFree(stash->send_waits);
126:   PetscFree(stash->recv_waits);
127:   PetscFree2(stash->svalues,stash->sindices);
128:   PetscFree2(stash->rvalues,stash->rindices);
129:   PetscFree(stash->nprocs);
130:   return(0);
131: }

133: /*
134:    VecStashGetInfo_Private - Gets the relavant statistics of the stash

136:    Input Parameters:
137:    stash    - the stash
138:    nstash   - the size of the stash
139:    reallocs - the number of additional mallocs incurred.

141: */
142: PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
143: {
145:   if (nstash) *nstash = stash->n*stash->bs;
146:   if (reallocs) {
147:     if (stash->reallocs < 0) *reallocs = 0;
148:     else                     *reallocs = stash->reallocs;
149:   }
150:   return(0);
151: }


154: /*
155:    VecStashSetInitialSize_Private - Sets the initial size of the stash

157:    Input Parameters:
158:    stash  - the stash
159:    max    - the value that is used as the max size of the stash.
160:             this value is used while allocating memory. It specifies
161:             the number of vals stored, even with the block-stash
162: */
163: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
164: {
166:   stash->umax = max;
167:   return(0);
168: }

170: /* VecStashExpand_Private - Expand the stash. This function is called
171:    when the space in the stash is not sufficient to add the new values
172:    being inserted into the stash.

174:    Input Parameters:
175:    stash - the stash
176:    incr  - the minimum increase requested

178:    Notes:
179:    This routine doubles the currently used memory.
180: */
181: PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
182: {
184:   PetscInt       *n_idx,newnmax,bs=stash->bs;
185:   PetscScalar    *n_array;

188:   /* allocate a larger stash. */
189:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
190:     if (stash->umax)                  newnmax = stash->umax/bs;
191:     else                              newnmax = DEFAULT_STASH_SIZE/bs;
192:   } else if (!stash->nmax) { /* resuing stash */
193:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
194:     else                              newnmax = stash->oldnmax/bs;
195:   } else                              newnmax = stash->nmax*2;

197:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

199:   PetscMalloc2(bs*newnmax,&n_array,newnmax,&n_idx);
200:   PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
201:   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
202:   PetscFree2(stash->array,stash->idx);

204:   stash->array = n_array;
205:   stash->idx   = n_idx;
206:   stash->nmax  = newnmax;
207:   stash->reallocs++;
208:   return(0);
209: }
210: /*
211:   VecStashScatterBegin_Private - Initiates the transfer of values to the
212:   correct owners. This function goes through the stash, and check the
213:   owners of each stashed value, and sends the values off to the owner
214:   processors.

216:   Input Parameters:
217:   stash  - the stash
218:   owners - an array of size 'no-of-procs' which gives the ownership range
219:            for each node.

221:   Notes:
222:     The 'owners' array in the cased of the blocked-stash has the
223:   ranges specified blocked global indices, and for the regular stash in
224:   the proper global indices.
225: */
226: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
227: {
229:   PetscMPIInt    size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
230:   PetscInt       *owner,*start,*nprocs,nsends,nreceives;
231:   PetscInt       nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
232:   PetscScalar    *rvalues,*svalues;
233:   MPI_Comm       comm = stash->comm;
234:   MPI_Request    *send_waits,*recv_waits;

237:   /*  first count number of contributors to each processor */
238:   PetscCalloc1(2*size,&nprocs);
239:   PetscMalloc1(stash->n,&owner);

241:   j       = 0;
242:   lastidx = -1;
243:   for (i=0; i<stash->n; i++) {
244:     /* if indices are NOT locally sorted, need to start search at the beginning */
245:     if (lastidx > (idx = stash->idx[i])) j = 0;
246:     lastidx = idx;
247:     for (; j<size; j++) {
248:       if (idx >= owners[j] && idx < owners[j+1]) {
249:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
250:       }
251:     }
252:   }
253:   nsends = 0;
254:   for (i=0; i<size; i++) nsends += nprocs[2*i+1];

256:   /* inform other processors of number of messages and max length*/
257:   PetscMaxSum(comm,nprocs,&nmax,&nreceives);

259:   /* post receives:
260:      since we don't know how long each individual message is we
261:      allocate the largest needed buffer for each receive. Potentially
262:      this is a lot of wasted space.
263:   */
264:   PetscMalloc2(nreceives*nmax*bs,&rvalues,nreceives*nmax,&rindices);
265:   PetscMalloc1(2*nreceives,&recv_waits);
266:   for (i=0,count=0; i<nreceives; i++) {
267:     MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
268:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
269:   }

271:   /* do sends:
272:       1) starts[i] gives the starting index in svalues for stuff going to
273:          the ith processor
274:   */
275:   PetscMalloc2(stash->n*bs,&svalues,stash->n,&sindices);
276:   PetscMalloc1(2*nsends,&send_waits);
277:   PetscMalloc1(size,&start);
278:   /* use 2 sends the first with all_v, the next with all_i */
279:   start[0] = 0;
280:   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];

282:   for (i=0; i<stash->n; i++) {
283:     j = owner[i];
284:     if (bs == 1) svalues[start[j]] = stash->array[i];
285:     else {
286:       PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
287:     }
288:     sindices[start[j]] = stash->idx[i];
289:     start[j]++;
290:   }
291:   start[0] = 0;
292:   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];

294:   for (i=0,count=0; i<size; i++) {
295:     if (nprocs[2*i+1]) {
296:       MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
297:       MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
298:     }
299:   }
300:   PetscFree(owner);
301:   PetscFree(start);
302:   /* This memory is reused in scatter end  for a different purpose*/
303:   for (i=0; i<2*size; i++) nprocs[i] = -1;

305:   stash->nprocs     = nprocs;
306:   stash->svalues    = svalues;
307:   stash->sindices   = sindices;
308:   stash->rvalues    = rvalues;
309:   stash->rindices   = rindices;
310:   stash->nsends     = nsends;
311:   stash->nrecvs     = nreceives;
312:   stash->send_waits = send_waits;
313:   stash->recv_waits = recv_waits;
314:   stash->rmax       = nmax;
315:   return(0);
316: }

318: /*
319:    VecStashScatterGetMesg_Private - This function waits on the receives posted
320:    in the function VecStashScatterBegin_Private() and returns one message at
321:    a time to the calling function. If no messages are left, it indicates this
322:    by setting flg = 0, else it sets flg = 1.

324:    Input Parameters:
325:    stash - the stash

327:    Output Parameters:
328:    nvals - the number of entries in the current message.
329:    rows  - an array of row indices (or blocked indices) corresponding to the values
330:    cols  - an array of columnindices (or blocked indices) corresponding to the values
331:    vals  - the values
332:    flg   - 0 indicates no more message left, and the current call has no values associated.
333:            1 indicates that the current call successfully received a message, and the
334:              other output parameters nvals,rows,cols,vals are set appropriately.
335: */
336: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
337: {
339:   PetscMPIInt    i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
340:   PetscInt       *flg_v;
341:   PetscInt       i1,i2,bs=stash->bs;
342:   MPI_Status     recv_status;
343:   PetscBool      match_found = PETSC_FALSE;

346:   *flg = 0; /* When a message is discovered this is reset to 1 */
347:   /* Return if no more messages to process */
348:   if (stash->nprocessed == stash->nrecvs) return(0);

350:   flg_v = stash->nprocs;
351:   /* If a matching pair of receives are found, process them, and return the data to
352:      the calling function. Until then keep receiving messages */
353:   while (!match_found) {
354:     MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
355:     /* Now pack the received message into a structure which is useable by others */
356:     if (i % 2) {
357:       MPI_Get_count(&recv_status,MPIU_INT,nvals);
358:       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
359:     } else {
360:       MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
361:       flg_v[2*recv_status.MPI_SOURCE] = i/2;
362:       *nvals = *nvals/bs;
363:     }

365:     /* Check if we have both the messages from this proc */
366:     i1 = flg_v[2*recv_status.MPI_SOURCE];
367:     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
368:     if (i1 != -1 && i2 != -1) {
369:       *rows = stash->rindices + i2*stash->rmax;
370:       *vals = stash->rvalues + i1*bs*stash->rmax;
371:       *flg  = 1;
372:       stash->nprocessed++;
373:       match_found = PETSC_TRUE;
374:     }
375:   }
376:   return(0);
377: }

379: /*
380:  * Sort the stash, removing duplicates (combining as appropriate).
381:  */
382: PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
383: {
385:   PetscInt i,j,bs = stash->bs;

388:   if (!stash->n) return(0);
389:   if (bs == 1) {
390:     PetscSortIntWithScalarArray(stash->n,stash->idx,stash->array);
391:     for (i=1,j=0; i<stash->n; i++) {
392:       if (stash->idx[i] == stash->idx[j]) {
393:         switch (stash->insertmode) {
394:         case ADD_VALUES:
395:           stash->array[j] += stash->array[i];
396:           break;
397:         case INSERT_VALUES:
398:           stash->array[j] = stash->array[i];
399:           break;
400:         default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
401:         }
402:       } else {
403:         j++;
404:         stash->idx[j]   = stash->idx[i];
405:         stash->array[j] = stash->array[i];
406:       }
407:     }
408:     stash->n = j + 1;
409:   } else {                      /* block stash */
410:     PetscInt *perm = NULL;
411:     PetscScalar *arr;
412:     PetscMalloc2(stash->n,&perm,stash->n*bs,&arr);
413:     for (i=0; i<stash->n; i++) perm[i] = i;
414:     PetscSortIntWithArray(stash->n,stash->idx,perm);

416:     /* Out-of-place copy of arr */
417:     PetscMemcpy(arr,stash->array+perm[0]*bs,bs*sizeof(PetscScalar));
418:     for (i=1,j=0; i<stash->n; i++) {
419:       PetscInt k;
420:       if (stash->idx[i] == stash->idx[j]) {
421:         switch (stash->insertmode) {
422:         case ADD_VALUES:
423:           for (k=0; k<bs; k++) arr[j*bs+k] += stash->array[perm[i]*bs+k];
424:           break;
425:         case INSERT_VALUES:
426:           for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
427:           break;
428:         default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
429:         }
430:       } else {
431:         j++;
432:         stash->idx[j] = stash->idx[i];
433:         for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
434:       }
435:     }
436:     stash->n = j + 1;
437:     PetscMemcpy(stash->array,arr,stash->n*bs*sizeof(PetscScalar));
438:     PetscFree2(perm,arr);
439:   }
440:   return(0);
441: }

443: PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash,PetscLayout map,PetscMPIInt *nowners,PetscMPIInt **owners)
444: {
446:   PetscInt i,bs = stash->bs,r;
447:   PetscSegBuffer seg;

450:   if (bs != 1 && bs != map->bs) SETERRQ2(map->comm,PETSC_ERR_PLIB,"Stash block size %D does not match layout block size %D",bs,map->bs);
451:   PetscSegBufferCreate(sizeof(PetscMPIInt),50,&seg);
452:   *nowners = 0;
453:   for (i=0,r=-1; i<stash->n; i++) {
454:     if (stash->idx[i]*bs >= map->range[r+1]) {
455:       PetscMPIInt *rank;
456:       PetscSegBufferGet(seg,1,&rank);
457:       PetscLayoutFindOwner(map,stash->idx[i]*bs,&r);
458:       *rank = r;
459:       (*nowners)++;
460:     }
461:   }
462:   PetscSegBufferExtractAlloc(seg,owners);
463:   PetscSegBufferDestroy(&seg);
464:   return(0);
465: }