2: #include <petsc/private/vecimpl.h>
4: #define DEFAULT_STASH_SIZE 100 6: /*
7: VecStashCreate_Private - Creates a stash,currently used for all the parallel
8: matrix implementations. The stash is where elements of a matrix destined
9: to be stored on other processors are kept until matrix assembly is done.
11: This is a simple minded stash. Simply adds entries to end of stash.
13: Input Parameters:
14: comm - communicator, required for scatters.
15: bs - stash block size. used when stashing blocks of values
17: Output Parameters:
18: stash - the newly created stash
19: */
22: PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash) 23: {
25: PetscInt max,*opt,nopt;
26: PetscBool flg;
29: /* Require 2 tags, get the second using PetscCommGetNewTag() */
30: stash->comm = comm;
31: PetscCommGetNewTag(stash->comm,&stash->tag1);
32: PetscCommGetNewTag(stash->comm,&stash->tag2);
33: MPI_Comm_size(stash->comm,&stash->size);
34: MPI_Comm_rank(stash->comm,&stash->rank);
36: nopt = stash->size;
37: PetscMalloc1(nopt,&opt);
38: PetscOptionsGetIntArray(NULL,"-vecstash_initial_size",opt,&nopt,&flg);
39: if (flg) {
40: if (nopt == 1) max = opt[0];
41: else if (nopt == stash->size) max = opt[stash->rank];
42: else if (stash->rank < nopt) max = opt[stash->rank];
43: else max = 0; /* use default */
44: stash->umax = max;
45: } else {
46: stash->umax = 0;
47: }
48: PetscFree(opt);
50: if (bs <= 0) bs = 1;
52: stash->bs = bs;
53: stash->nmax = 0;
54: stash->oldnmax = 0;
55: stash->n = 0;
56: stash->reallocs = -1;
57: stash->idx = 0;
58: stash->array = 0;
60: stash->send_waits = 0;
61: stash->recv_waits = 0;
62: stash->send_status = 0;
63: stash->nsends = 0;
64: stash->nrecvs = 0;
65: stash->svalues = 0;
66: stash->rvalues = 0;
67: stash->rmax = 0;
68: stash->nprocs = 0;
69: stash->nprocessed = 0;
70: stash->donotstash = PETSC_FALSE;
71: stash->ignorenegidx = PETSC_FALSE;
72: return(0);
73: }
75: /*
76: VecStashDestroy_Private - Destroy the stash
77: */
80: PetscErrorCode VecStashDestroy_Private(VecStash *stash) 81: {
85: PetscFree2(stash->array,stash->idx);
86: PetscFree(stash->bowners);
87: return(0);
88: }
90: /*
91: VecStashScatterEnd_Private - This is called as the fial stage of
92: scatter. The final stages of message passing is done here, and
93: all the memory used for message passing is cleanedu up. This
94: routine also resets the stash, and deallocates the memory used
95: for the stash. It also keeps track of the current memory usage
96: so that the same value can be used the next time through.
97: */
100: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)101: {
103: PetscInt nsends=stash->nsends,oldnmax;
104: MPI_Status *send_status;
107: /* wait on sends */
108: if (nsends) {
109: PetscMalloc1(2*nsends,&send_status);
110: MPI_Waitall(2*nsends,stash->send_waits,send_status);
111: PetscFree(send_status);
112: }
114: /* Now update nmaxold to be app 10% more than max n, this way the
115: wastage of space is reduced the next time this stash is used.
116: Also update the oldmax, only if it increases */
117: if (stash->n) {
118: oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
119: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
120: }
122: stash->nmax = 0;
123: stash->n = 0;
124: stash->reallocs = -1;
125: stash->rmax = 0;
126: stash->nprocessed = 0;
128: PetscFree2(stash->array,stash->idx);
129: stash->array = 0;
130: stash->idx = 0;
131: PetscFree(stash->send_waits);
132: PetscFree(stash->recv_waits);
133: PetscFree2(stash->svalues,stash->sindices);
134: PetscFree2(stash->rvalues,stash->rindices);
135: PetscFree(stash->nprocs);
136: return(0);
137: }
139: /*
140: VecStashGetInfo_Private - Gets the relavant statistics of the stash
142: Input Parameters:
143: stash - the stash
144: nstash - the size of the stash
145: reallocs - the number of additional mallocs incurred.
147: */
150: PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)151: {
153: if (nstash) *nstash = stash->n*stash->bs;
154: if (reallocs) {
155: if (stash->reallocs < 0) *reallocs = 0;
156: else *reallocs = stash->reallocs;
157: }
158: return(0);
159: }
162: /*
163: VecStashSetInitialSize_Private - Sets the initial size of the stash
165: Input Parameters:
166: stash - the stash
167: max - the value that is used as the max size of the stash.
168: this value is used while allocating memory. It specifies
169: the number of vals stored, even with the block-stash
170: */
173: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)174: {
176: stash->umax = max;
177: return(0);
178: }
180: /* VecStashExpand_Private - Expand the stash. This function is called
181: when the space in the stash is not sufficient to add the new values
182: being inserted into the stash.
184: Input Parameters:
185: stash - the stash
186: incr - the minimum increase requested
188: Notes:
189: This routine doubles the currently used memory.
190: */
193: PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)194: {
196: PetscInt *n_idx,newnmax,bs=stash->bs;
197: PetscScalar *n_array;
200: /* allocate a larger stash. */
201: if (!stash->oldnmax && !stash->nmax) { /* new stash */
202: if (stash->umax) newnmax = stash->umax/bs;
203: else newnmax = DEFAULT_STASH_SIZE/bs;
204: } else if (!stash->nmax) { /* resuing stash */
205: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
206: else newnmax = stash->oldnmax/bs;
207: } else newnmax = stash->nmax*2;
209: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
211: PetscMalloc2(bs*newnmax,&n_array,newnmax,&n_idx);
212: PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
213: PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
214: PetscFree2(stash->array,stash->idx);
216: stash->array = n_array;
217: stash->idx = n_idx;
218: stash->nmax = newnmax;
219: stash->reallocs++;
220: return(0);
221: }
222: /*
223: VecStashScatterBegin_Private - Initiates the transfer of values to the
224: correct owners. This function goes through the stash, and check the
225: owners of each stashed value, and sends the values off to the owner
226: processors.
228: Input Parameters:
229: stash - the stash
230: owners - an array of size 'no-of-procs' which gives the ownership range
231: for each node.
233: Notes: The 'owners' array in the cased of the blocked-stash has the
234: ranges specified blocked global indices, and for the regular stash in
235: the proper global indices.
236: */
239: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)240: {
242: PetscMPIInt size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
243: PetscInt *owner,*start,*nprocs,nsends,nreceives;
244: PetscInt nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
245: PetscScalar *rvalues,*svalues;
246: MPI_Comm comm = stash->comm;
247: MPI_Request *send_waits,*recv_waits;
250: /* first count number of contributors to each processor */
251: PetscCalloc1(2*size,&nprocs);
252: PetscMalloc1(stash->n,&owner);
254: j = 0;
255: lastidx = -1;
256: for (i=0; i<stash->n; i++) {
257: /* if indices are NOT locally sorted, need to start search at the beginning */
258: if (lastidx > (idx = stash->idx[i])) j = 0;
259: lastidx = idx;
260: for (; j<size; j++) {
261: if (idx >= owners[j] && idx < owners[j+1]) {
262: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
263: }
264: }
265: }
266: nsends = 0;
267: for (i=0; i<size; i++) nsends += nprocs[2*i+1];
269: /* inform other processors of number of messages and max length*/
270: PetscMaxSum(comm,nprocs,&nmax,&nreceives);
272: /* post receives:
273: since we don't know how long each individual message is we
274: allocate the largest needed buffer for each receive. Potentially
275: this is a lot of wasted space.
276: */
277: PetscMalloc2(nreceives*nmax*bs,&rvalues,nreceives*nmax,&rindices);
278: PetscMalloc1(2*nreceives,&recv_waits);
279: for (i=0,count=0; i<nreceives; i++) {
280: MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
281: MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
282: }
284: /* do sends:
285: 1) starts[i] gives the starting index in svalues for stuff going to
286: the ith processor
287: */
288: PetscMalloc2(stash->n*bs,&svalues,stash->n,&sindices);
289: PetscMalloc1(2*nsends,&send_waits);
290: PetscMalloc1(size,&start);
291: /* use 2 sends the first with all_v, the next with all_i */
292: start[0] = 0;
293: for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
295: for (i=0; i<stash->n; i++) {
296: j = owner[i];
297: if (bs == 1) svalues[start[j]] = stash->array[i];
298: else {
299: PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
300: }
301: sindices[start[j]] = stash->idx[i];
302: start[j]++;
303: }
304: start[0] = 0;
305: for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
307: for (i=0,count=0; i<size; i++) {
308: if (nprocs[2*i+1]) {
309: MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
310: MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
311: }
312: }
313: PetscFree(owner);
314: PetscFree(start);
315: /* This memory is reused in scatter end for a different purpose*/
316: for (i=0; i<2*size; i++) nprocs[i] = -1;
318: stash->nprocs = nprocs;
319: stash->svalues = svalues;
320: stash->sindices = sindices;
321: stash->rvalues = rvalues;
322: stash->rindices = rindices;
323: stash->nsends = nsends;
324: stash->nrecvs = nreceives;
325: stash->send_waits = send_waits;
326: stash->recv_waits = recv_waits;
327: stash->rmax = nmax;
328: return(0);
329: }
331: /*
332: VecStashScatterGetMesg_Private - This function waits on the receives posted
333: in the function VecStashScatterBegin_Private() and returns one message at
334: a time to the calling function. If no messages are left, it indicates this
335: by setting flg = 0, else it sets flg = 1.
337: Input Parameters:
338: stash - the stash
340: Output Parameters:
341: nvals - the number of entries in the current message.
342: rows - an array of row indices (or blocked indices) corresponding to the values
343: cols - an array of columnindices (or blocked indices) corresponding to the values
344: vals - the values
345: flg - 0 indicates no more message left, and the current call has no values associated.
346: 1 indicates that the current call successfully received a message, and the
347: other output parameters nvals,rows,cols,vals are set appropriately.
348: */
351: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)352: {
354: PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
355: PetscInt *flg_v;
356: PetscInt i1,i2,bs=stash->bs;
357: MPI_Status recv_status;
358: PetscBool match_found = PETSC_FALSE;
361: *flg = 0; /* When a message is discovered this is reset to 1 */
362: /* Return if no more messages to process */
363: if (stash->nprocessed == stash->nrecvs) return(0);
365: flg_v = stash->nprocs;
366: /* If a matching pair of receieves are found, process them, and return the data to
367: the calling function. Until then keep receiving messages */
368: while (!match_found) {
369: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
370: /* Now pack the received message into a structure which is useable by others */
371: if (i % 2) {
372: MPI_Get_count(&recv_status,MPIU_INT,nvals);
373: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
374: } else {
375: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
376: flg_v[2*recv_status.MPI_SOURCE] = i/2;
377: *nvals = *nvals/bs;
378: }
380: /* Check if we have both the messages from this proc */
381: i1 = flg_v[2*recv_status.MPI_SOURCE];
382: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
383: if (i1 != -1 && i2 != -1) {
384: *rows = stash->rindices + i2*stash->rmax;
385: *vals = stash->rvalues + i1*bs*stash->rmax;
386: *flg = 1;
387: stash->nprocessed++;
388: match_found = PETSC_TRUE;
389: }
390: }
391: return(0);
392: }