Actual source code: vecstash.c

petsc-3.4.2 2013-07-02
  2: #include <petsc-private/vecimpl.h>

  4: #define DEFAULT_STASH_SIZE   100

  6: /*
  7:   VecStashCreate_Private - Creates a stash,currently used for all the parallel
  8:   matrix implementations. The stash is where elements of a matrix destined
  9:   to be stored on other processors are kept until matrix assembly is done.

 11:   This is a simple minded stash. Simply adds entries to end of stash.

 13:   Input Parameters:
 14:   comm - communicator, required for scatters.
 15:   bs   - stash block size. used when stashing blocks of values

 17:   Output Parameters:
 18:   stash    - the newly created stash
 19: */
 22: PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
 23: {
 25:   PetscInt       max,*opt,nopt;
 26:   PetscBool      flg;

 29:   /* Require 2 tags, get the second using PetscCommGetNewTag() */
 30:   stash->comm = comm;
 31:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 32:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 33:   MPI_Comm_size(stash->comm,&stash->size);
 34:   MPI_Comm_rank(stash->comm,&stash->rank);

 36:   nopt = stash->size;
 37:   PetscMalloc(nopt*sizeof(PetscInt),&opt);
 38:   PetscOptionsGetIntArray(NULL,"-vecstash_initial_size",opt,&nopt,&flg);
 39:   if (flg) {
 40:     if (nopt == 1)                max = opt[0];
 41:     else if (nopt == stash->size) max = opt[stash->rank];
 42:     else if (stash->rank < nopt)  max = opt[stash->rank];
 43:     else                          max = 0; /* use default */
 44:     stash->umax = max;
 45:   } else {
 46:     stash->umax = 0;
 47:   }
 48:   PetscFree(opt);

 50:   if (bs <= 0) bs = 1;

 52:   stash->bs       = bs;
 53:   stash->nmax     = 0;
 54:   stash->oldnmax  = 0;
 55:   stash->n        = 0;
 56:   stash->reallocs = -1;
 57:   stash->idx      = 0;
 58:   stash->array    = 0;

 60:   stash->send_waits   = 0;
 61:   stash->recv_waits   = 0;
 62:   stash->send_status  = 0;
 63:   stash->nsends       = 0;
 64:   stash->nrecvs       = 0;
 65:   stash->svalues      = 0;
 66:   stash->rvalues      = 0;
 67:   stash->rmax         = 0;
 68:   stash->nprocs       = 0;
 69:   stash->nprocessed   = 0;
 70:   stash->donotstash   = PETSC_FALSE;
 71:   stash->ignorenegidx = PETSC_FALSE;
 72:   return(0);
 73: }

 75: /*
 76:    VecStashDestroy_Private - Destroy the stash
 77: */
 80: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
 81: {

 85:   PetscFree2(stash->array,stash->idx);
 86:   PetscFree(stash->bowners);
 87:   return(0);
 88: }

 90: /*
 91:    VecStashScatterEnd_Private - This is called as the fial stage of
 92:    scatter. The final stages of message passing is done here, and
 93:    all the memory used for message passing is cleanedu up. This
 94:    routine also resets the stash, and deallocates the memory used
 95:    for the stash. It also keeps track of the current memory usage
 96:    so that the same value can be used the next time through.
 97: */
100: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
101: {
103:   PetscInt       nsends=stash->nsends,oldnmax;
104:   MPI_Status     *send_status;

107:   /* wait on sends */
108:   if (nsends) {
109:     PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
110:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
111:     PetscFree(send_status);
112:   }

114:   /* Now update nmaxold to be app 10% more than max n, this way the
115:      wastage of space is reduced the next time this stash is used.
116:      Also update the oldmax, only if it increases */
117:   if (stash->n) {
118:     oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
119:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
120:   }

122:   stash->nmax       = 0;
123:   stash->n          = 0;
124:   stash->reallocs   = -1;
125:   stash->rmax       = 0;
126:   stash->nprocessed = 0;

128:   PetscFree2(stash->array,stash->idx);
129:   stash->array = 0;
130:   stash->idx   = 0;
131:   PetscFree(stash->send_waits);
132:   PetscFree(stash->recv_waits);
133:   PetscFree2(stash->svalues,stash->sindices);
134:   PetscFree2(stash->rvalues,stash->rindices);
135:   PetscFree(stash->nprocs);
136:   return(0);
137: }

139: /*
140:    VecStashGetInfo_Private - Gets the relavant statistics of the stash

142:    Input Parameters:
143:    stash    - the stash
144:    nstash   - the size of the stash
145:    reallocs - the number of additional mallocs incurred.

147: */
150: PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
151: {
153:   if (nstash) *nstash = stash->n*stash->bs;
154:   if (reallocs) {
155:     if (stash->reallocs < 0) *reallocs = 0;
156:     else                     *reallocs = stash->reallocs;
157:   }
158:   return(0);
159: }


162: /*
163:    VecStashSetInitialSize_Private - Sets the initial size of the stash

165:    Input Parameters:
166:    stash  - the stash
167:    max    - the value that is used as the max size of the stash.
168:             this value is used while allocating memory. It specifies
169:             the number of vals stored, even with the block-stash
170: */
173: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
174: {
176:   stash->umax = max;
177:   return(0);
178: }

180: /* VecStashExpand_Private - Expand the stash. This function is called
181:    when the space in the stash is not sufficient to add the new values
182:    being inserted into the stash.

184:    Input Parameters:
185:    stash - the stash
186:    incr  - the minimum increase requested

188:    Notes:
189:    This routine doubles the currently used memory.
190: */
193: PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
194: {
196:   PetscInt       *n_idx,newnmax,bs=stash->bs;
197:   PetscScalar    *n_array;

200:   /* allocate a larger stash. */
201:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
202:     if (stash->umax)                  newnmax = stash->umax/bs;
203:     else                              newnmax = DEFAULT_STASH_SIZE/bs;
204:   } else if (!stash->nmax) { /* resuing stash */
205:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
206:     else                              newnmax = stash->oldnmax/bs;
207:   } else                              newnmax = stash->nmax*2;

209:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

211:   PetscMalloc2(bs*newnmax,PetscScalar,&n_array,newnmax,PetscInt,&n_idx);
212:   PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
213:   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
214:   PetscFree2(stash->array,stash->idx);

216:   stash->array = n_array;
217:   stash->idx   = n_idx;
218:   stash->nmax  = newnmax;
219:   stash->reallocs++;
220:   return(0);
221: }
222: /*
223:   VecStashScatterBegin_Private - Initiates the transfer of values to the
224:   correct owners. This function goes through the stash, and check the
225:   owners of each stashed value, and sends the values off to the owner
226:   processors.

228:   Input Parameters:
229:   stash  - the stash
230:   owners - an array of size 'no-of-procs' which gives the ownership range
231:            for each node.

233:   Notes: The 'owners' array in the cased of the blocked-stash has the
234:   ranges specified blocked global indices, and for the regular stash in
235:   the proper global indices.
236: */
239: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
240: {
242:   PetscMPIInt    size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
243:   PetscInt       *owner,*start,*nprocs,nsends,nreceives;
244:   PetscInt       nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
245:   PetscScalar    *rvalues,*svalues;
246:   MPI_Comm       comm = stash->comm;
247:   MPI_Request    *send_waits,*recv_waits;

250:   /*  first count number of contributors to each processor */
251:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
252:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
253:   PetscMalloc(stash->n*sizeof(PetscInt),&owner);

255:   j       = 0;
256:   lastidx = -1;
257:   for (i=0; i<stash->n; i++) {
258:     /* if indices are NOT locally sorted, need to start search at the beginning */
259:     if (lastidx > (idx = stash->idx[i])) j = 0;
260:     lastidx = idx;
261:     for (; j<size; j++) {
262:       if (idx >= owners[j] && idx < owners[j+1]) {
263:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
264:       }
265:     }
266:   }
267:   nsends = 0;
268:   for (i=0; i<size; i++) nsends += nprocs[2*i+1];

270:   /* inform other processors of number of messages and max length*/
271:   PetscMaxSum(comm,nprocs,&nmax,&nreceives);

273:   /* post receives:
274:      since we don't know how long each individual message is we
275:      allocate the largest needed buffer for each receive. Potentially
276:      this is a lot of wasted space.
277:   */
278:   PetscMalloc2(nreceives*nmax*bs,PetscScalar,&rvalues,nreceives*nmax,PetscInt,&rindices);
279:   PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);
280:   for (i=0,count=0; i<nreceives; i++) {
281:     MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
282:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
283:   }

285:   /* do sends:
286:       1) starts[i] gives the starting index in svalues for stuff going to
287:          the ith processor
288:   */
289:   PetscMalloc2(stash->n*bs,PetscScalar,&svalues,stash->n,PetscInt,&sindices);
290:   PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);
291:   PetscMalloc(size*sizeof(PetscInt),&start);
292:   /* use 2 sends the first with all_v, the next with all_i */
293:   start[0] = 0;
294:   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];

296:   for (i=0; i<stash->n; i++) {
297:     j = owner[i];
298:     if (bs == 1) svalues[start[j]] = stash->array[i];
299:     else {
300:       PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
301:     }
302:     sindices[start[j]] = stash->idx[i];
303:     start[j]++;
304:   }
305:   start[0] = 0;
306:   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];

308:   for (i=0,count=0; i<size; i++) {
309:     if (nprocs[2*i+1]) {
310:       MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
311:       MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
312:     }
313:   }
314:   PetscFree(owner);
315:   PetscFree(start);
316:   /* This memory is reused in scatter end  for a different purpose*/
317:   for (i=0; i<2*size; i++) nprocs[i] = -1;

319:   stash->nprocs     = nprocs;
320:   stash->svalues    = svalues;
321:   stash->sindices   = sindices;
322:   stash->rvalues    = rvalues;
323:   stash->rindices   = rindices;
324:   stash->nsends     = nsends;
325:   stash->nrecvs     = nreceives;
326:   stash->send_waits = send_waits;
327:   stash->recv_waits = recv_waits;
328:   stash->rmax       = nmax;
329:   return(0);
330: }

332: /*
333:    VecStashScatterGetMesg_Private - This function waits on the receives posted
334:    in the function VecStashScatterBegin_Private() and returns one message at
335:    a time to the calling function. If no messages are left, it indicates this
336:    by setting flg = 0, else it sets flg = 1.

338:    Input Parameters:
339:    stash - the stash

341:    Output Parameters:
342:    nvals - the number of entries in the current message.
343:    rows  - an array of row indices (or blocked indices) corresponding to the values
344:    cols  - an array of columnindices (or blocked indices) corresponding to the values
345:    vals  - the values
346:    flg   - 0 indicates no more message left, and the current call has no values associated.
347:            1 indicates that the current call successfully received a message, and the
348:              other output parameters nvals,rows,cols,vals are set appropriately.
349: */
352: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
353: {
355:   PetscMPIInt    i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
356:   PetscInt       *flg_v;
357:   PetscInt       i1,i2,bs=stash->bs;
358:   MPI_Status     recv_status;
359:   PetscBool      match_found = PETSC_FALSE;

362:   *flg = 0; /* When a message is discovered this is reset to 1 */
363:   /* Return if no more messages to process */
364:   if (stash->nprocessed == stash->nrecvs) return(0);

366:   flg_v = stash->nprocs;
367:   /* If a matching pair of receieves are found, process them, and return the data to
368:      the calling function. Until then keep receiving messages */
369:   while (!match_found) {
370:     MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
371:     /* Now pack the received message into a structure which is useable by others */
372:     if (i % 2) {
373:       MPI_Get_count(&recv_status,MPIU_INT,nvals);
374:       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
375:     } else {
376:       MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
377:       flg_v[2*recv_status.MPI_SOURCE] = i/2;
378:       *nvals = *nvals/bs;
379:     }

381:     /* Check if we have both the messages from this proc */
382:     i1 = flg_v[2*recv_status.MPI_SOURCE];
383:     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
384:     if (i1 != -1 && i2 != -1) {
385:       *rows = stash->rindices + i2*stash->rmax;
386:       *vals = stash->rvalues + i1*bs*stash->rmax;
387:       *flg  = 1;
388:       stash->nprocessed++;
389:       match_found = PETSC_TRUE;
390:     }
391:   }
392:   return(0);
393: }