Actual source code: vpscat.c
petsc-3.8.3 2017-12-09
2: /*
3: Defines parallel vector scatters.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
8: #include <petscsf.h>
10: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
11: {
12: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
13: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
14: PetscErrorCode ierr;
15: PetscInt i;
16: PetscMPIInt rank;
17: PetscViewerFormat format;
18: PetscBool iascii;
21: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
22: if (iascii) {
23: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
24: PetscViewerGetFormat(viewer,&format);
25: if (format == PETSC_VIEWER_ASCII_INFO) {
26: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
28: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
29: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: itmp = to->starts[to->n+1];
31: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
32: itmp = from->starts[from->n+1];
33: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
34: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
36: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
37: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
38: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
39: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
40: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
43: } else {
44: PetscViewerASCIIPushSynchronized(viewer);
45: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
46: if (to->n) {
47: for (i=0; i<to->n; i++) {
48: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
49: }
50: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
51: for (i=0; i<to->starts[to->n]; i++) {
52: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
53: }
54: }
56: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
57: if (from->n) {
58: for (i=0; i<from->n; i++) {
59: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
60: }
62: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
63: for (i=0; i<from->starts[from->n]; i++) {
64: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
65: }
66: }
67: if (to->local.n) {
68: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
69: for (i=0; i<to->local.n; i++) { /* the to and from have the opposite meaning from what you would expect */
70: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
71: }
72: }
74: PetscViewerFlush(viewer);
75: PetscViewerASCIIPopSynchronized(viewer);
76: }
77: }
78: return(0);
79: }
81: /* -----------------------------------------------------------------------------------*/
82: /*
83: The next routine determines what part of the local part of the scatter is an
84: exact copy of values into their current location. We check this here and
85: then know that we need not perform that portion of the scatter when the vector is
86: scattering to itself with INSERT_VALUES.
88: This is currently not used but would speed up, for example DMLocalToLocalBegin/End()
90: */
91: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
92: {
93: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
95: PetscInt *nto_slots,*nfrom_slots,j = 0;
98: for (i=0; i<n; i++) {
99: if (to_slots[i] != from_slots[i]) n_nonmatching++;
100: }
102: if (!n_nonmatching) {
103: to->nonmatching_computed = PETSC_TRUE;
104: to->n_nonmatching = from->n_nonmatching = 0;
106: PetscInfo1(scatter,"Reduced %D to 0\n", n);
107: } else if (n_nonmatching == n) {
108: to->nonmatching_computed = PETSC_FALSE;
110: PetscInfo(scatter,"All values non-matching\n");
111: } else {
112: to->nonmatching_computed= PETSC_TRUE;
113: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
115: PetscMalloc1(n_nonmatching,&nto_slots);
116: PetscMalloc1(n_nonmatching,&nfrom_slots);
118: to->slots_nonmatching = nto_slots;
119: from->slots_nonmatching = nfrom_slots;
120: for (i=0; i<n; i++) {
121: if (to_slots[i] != from_slots[i]) {
122: nto_slots[j] = to_slots[i];
123: nfrom_slots[j] = from_slots[i];
124: j++;
125: }
126: }
127: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
128: }
129: return(0);
130: }
132: /* --------------------------------------------------------------------------------------*/
134: /* -------------------------------------------------------------------------------------*/
135: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
136: {
137: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
138: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
139: PetscErrorCode ierr;
140: PetscInt i;
143: if (to->use_readyreceiver) {
144: /*
145: Since we have already posted sends we must cancel them before freeing
146: the requests
147: */
148: for (i=0; i<from->n; i++) {
149: MPI_Cancel(from->requests+i);
150: }
151: for (i=0; i<to->n; i++) {
152: MPI_Cancel(to->rev_requests+i);
153: }
154: MPI_Waitall(from->n,from->requests,to->rstatus);
155: MPI_Waitall(to->n,to->rev_requests,to->rstatus);
156: }
158: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
159: if (to->use_alltoallw) {
160: for (i=0; i<to->n; i++) {
161: MPI_Type_free(to->types+to->procs[i]);
162: }
163: PetscFree3(to->wcounts,to->wdispls,to->types);
164: if (!from->contiq) {
165: for (i=0; i<from->n; i++) {
166: MPI_Type_free(from->types+from->procs[i]);
167: }
168: }
169: PetscFree3(from->wcounts,from->wdispls,from->types);
170: }
171: #endif
173: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
174: if (to->use_window) {
175: MPI_Win_free(&from->window);
176: MPI_Win_free(&to->window);
177: PetscFree(from->winstarts);
178: PetscFree(to->winstarts);
179: }
180: #endif
182: if (to->use_alltoallv) {
183: PetscFree2(to->counts,to->displs);
184: PetscFree2(from->counts,from->displs);
185: }
187: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
188: /*
189: IBM's PE version of MPI has a bug where freeing these guys will screw up later
190: message passing.
191: */
192: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
193: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
194: if (to->requests) {
195: for (i=0; i<to->n; i++) {
196: MPI_Request_free(to->requests + i);
197: }
198: }
199: if (to->rev_requests) {
200: for (i=0; i<to->n; i++) {
201: MPI_Request_free(to->rev_requests + i);
202: }
203: }
204: }
205: /*
206: MPICH could not properly cancel requests thus with ready receiver mode we
207: cannot free the requests. It may be fixed now, if not then put the following
208: code inside a if (!to->use_readyreceiver) {
209: */
210: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
211: if (from->requests) {
212: for (i=0; i<from->n; i++) {
213: MPI_Request_free(from->requests + i);
214: }
215: }
217: if (from->rev_requests) {
218: for (i=0; i<from->n; i++) {
219: MPI_Request_free(from->rev_requests + i);
220: }
221: }
222: }
223: #endif
225: PetscFree(to->local.vslots);
226: PetscFree(from->local.vslots);
227: PetscFree2(to->counts,to->displs);
228: PetscFree2(from->counts,from->displs);
229: PetscFree(to->local.slots_nonmatching);
230: PetscFree(from->local.slots_nonmatching);
231: PetscFree(to->rev_requests);
232: PetscFree(from->rev_requests);
233: PetscFree(to->requests);
234: PetscFree(from->requests);
235: PetscFree4(to->values,to->indices,to->starts,to->procs);
236: PetscFree2(to->sstatus,to->rstatus);
237: PetscFree4(from->values,from->indices,from->starts,from->procs);
238: PetscFree(from);
239: PetscFree(to);
240: return(0);
241: }
245: /* --------------------------------------------------------------------------------------*/
246: /*
247: Special optimization to see if the local part of the scatter is actually
248: a copy. The scatter routines call PetscMemcpy() instead.
250: */
251: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
252: {
253: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
254: PetscInt to_start,from_start;
258: to_start = to_slots[0];
259: from_start = from_slots[0];
261: for (i=1; i<n; i++) {
262: to_start += bs;
263: from_start += bs;
264: if (to_slots[i] != to_start) return(0);
265: if (from_slots[i] != from_start) return(0);
266: }
267: to->is_copy = PETSC_TRUE;
268: to->copy_start = to_slots[0];
269: to->copy_length = bs*sizeof(PetscScalar)*n;
270: from->is_copy = PETSC_TRUE;
271: from->copy_start = from_slots[0];
272: from->copy_length = bs*sizeof(PetscScalar)*n;
273: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
274: return(0);
275: }
277: /* --------------------------------------------------------------------------------------*/
279: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
280: {
281: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
282: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
283: PetscErrorCode ierr;
284: PetscInt ny,bs = in_from->bs;
287: out->ops->begin = in->ops->begin;
288: out->ops->end = in->ops->end;
289: out->ops->copy = in->ops->copy;
290: out->ops->destroy = in->ops->destroy;
291: out->ops->view = in->ops->view;
293: /* allocate entire send scatter context */
294: PetscNewLog(out,&out_to);
295: PetscNewLog(out,&out_from);
297: ny = in_to->starts[in_to->n];
298: out_to->n = in_to->n;
299: out_to->type = in_to->type;
300: out_to->sendfirst = in_to->sendfirst;
302: PetscMalloc1(out_to->n,&out_to->requests);
303: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
304: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
305: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
306: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
307: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
309: out->todata = (void*)out_to;
310: out_to->local.n = in_to->local.n;
311: out_to->local.nonmatching_computed = PETSC_FALSE;
312: out_to->local.n_nonmatching = 0;
313: out_to->local.slots_nonmatching = 0;
314: if (in_to->local.n) {
315: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
316: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
317: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
318: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
319: } else {
320: out_to->local.vslots = 0;
321: out_from->local.vslots = 0;
322: }
324: /* allocate entire receive context */
325: out_from->type = in_from->type;
326: ny = in_from->starts[in_from->n];
327: out_from->n = in_from->n;
328: out_from->sendfirst = in_from->sendfirst;
330: PetscMalloc1(out_from->n,&out_from->requests);
331: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
332: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
333: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
334: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
336: out->fromdata = (void*)out_from;
337: out_from->local.n = in_from->local.n;
338: out_from->local.nonmatching_computed = PETSC_FALSE;
339: out_from->local.n_nonmatching = 0;
340: out_from->local.slots_nonmatching = 0;
342: /*
343: set up the request arrays for use with isend_init() and irecv_init()
344: */
345: {
346: PetscMPIInt tag;
347: MPI_Comm comm;
348: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
349: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
350: PetscInt i;
351: PetscBool flg;
352: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
353: MPI_Request *rev_swaits,*rev_rwaits;
354: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
356: PetscMalloc1(in_to->n,&out_to->rev_requests);
357: PetscMalloc1(in_from->n,&out_from->rev_requests);
359: rev_rwaits = out_to->rev_requests;
360: rev_swaits = out_from->rev_requests;
362: out_from->bs = out_to->bs = bs;
363: tag = ((PetscObject)out)->tag;
364: PetscObjectGetComm((PetscObject)out,&comm);
366: /* Register the receives that you will use later (sends for scatter reverse) */
367: for (i=0; i<out_from->n; i++) {
368: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
369: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
370: }
372: flg = PETSC_FALSE;
373: PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&flg,NULL);
374: if (flg) {
375: out_to->use_readyreceiver = PETSC_TRUE;
376: out_from->use_readyreceiver = PETSC_TRUE;
377: for (i=0; i<out_to->n; i++) {
378: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
379: }
380: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
381: MPI_Barrier(comm);
382: PetscInfo(in,"Using VecScatter ready receiver mode\n");
383: } else {
384: out_to->use_readyreceiver = PETSC_FALSE;
385: out_from->use_readyreceiver = PETSC_FALSE;
386: flg = PETSC_FALSE;
387: PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&flg,NULL);
388: if (flg) {
389: PetscInfo(in,"Using VecScatter Ssend mode\n");
390: }
391: for (i=0; i<out_to->n; i++) {
392: if (!flg) {
393: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
394: } else {
395: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
396: }
397: }
398: }
399: /* Register receives for scatter reverse */
400: for (i=0; i<out_to->n; i++) {
401: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
402: }
403: }
404: return(0);
405: }
407: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
408: {
409: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
410: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
411: PetscErrorCode ierr;
412: PetscInt ny,bs = in_from->bs;
413: PetscMPIInt size;
416: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
418: out->ops->begin = in->ops->begin;
419: out->ops->end = in->ops->end;
420: out->ops->copy = in->ops->copy;
421: out->ops->destroy = in->ops->destroy;
422: out->ops->view = in->ops->view;
424: /* allocate entire send scatter context */
425: PetscNewLog(out,&out_to);
426: PetscNewLog(out,&out_from);
428: ny = in_to->starts[in_to->n];
429: out_to->n = in_to->n;
430: out_to->type = in_to->type;
431: out_to->sendfirst = in_to->sendfirst;
433: PetscMalloc1(out_to->n,&out_to->requests);
434: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
435: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
436: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
437: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
438: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
440: out->todata = (void*)out_to;
441: out_to->local.n = in_to->local.n;
442: out_to->local.nonmatching_computed = PETSC_FALSE;
443: out_to->local.n_nonmatching = 0;
444: out_to->local.slots_nonmatching = 0;
445: if (in_to->local.n) {
446: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
447: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
448: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
449: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
450: } else {
451: out_to->local.vslots = 0;
452: out_from->local.vslots = 0;
453: }
455: /* allocate entire receive context */
456: out_from->type = in_from->type;
457: ny = in_from->starts[in_from->n];
458: out_from->n = in_from->n;
459: out_from->sendfirst = in_from->sendfirst;
461: PetscMalloc1(out_from->n,&out_from->requests);
462: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
463: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
464: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
465: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
467: out->fromdata = (void*)out_from;
468: out_from->local.n = in_from->local.n;
469: out_from->local.nonmatching_computed = PETSC_FALSE;
470: out_from->local.n_nonmatching = 0;
471: out_from->local.slots_nonmatching = 0;
473: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
475: PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
476: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
477: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
479: PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
480: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
481: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
482: return(0);
483: }
484: /* --------------------------------------------------------------------------------------------------
485: Packs and unpacks the message data into send or from receive buffers.
487: These could be generated automatically.
489: Fortran kernels etc. could be used.
490: */
491: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
492: {
493: PetscInt i;
494: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
495: }
497: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
498: {
499: PetscInt i;
502: switch (addv) {
503: case INSERT_VALUES:
504: case INSERT_ALL_VALUES:
505: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
506: break;
507: case ADD_VALUES:
508: case ADD_ALL_VALUES:
509: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
510: break;
511: #if !defined(PETSC_USE_COMPLEX)
512: case MAX_VALUES:
513: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
514: #else
515: case MAX_VALUES:
516: #endif
517: case NOT_SET_VALUES:
518: break;
519: default:
520: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
521: }
522: return(0);
523: }
525: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
526: {
527: PetscInt i;
530: switch (addv) {
531: case INSERT_VALUES:
532: case INSERT_ALL_VALUES:
533: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
534: break;
535: case ADD_VALUES:
536: case ADD_ALL_VALUES:
537: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
538: break;
539: #if !defined(PETSC_USE_COMPLEX)
540: case MAX_VALUES:
541: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
542: #else
543: case MAX_VALUES:
544: #endif
545: case NOT_SET_VALUES:
546: break;
547: default:
548: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
549: }
550: return(0);
551: }
553: /* ----------------------------------------------------------------------------------------------- */
554: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
555: {
556: PetscInt i,idx;
558: for (i=0; i<n; i++) {
559: idx = *indicesx++;
560: y[0] = x[idx];
561: y[1] = x[idx+1];
562: y += 2;
563: }
564: }
566: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
567: {
568: PetscInt i,idy;
571: switch (addv) {
572: case INSERT_VALUES:
573: case INSERT_ALL_VALUES:
574: for (i=0; i<n; i++) {
575: idy = *indicesy++;
576: y[idy] = x[0];
577: y[idy+1] = x[1];
578: x += 2;
579: }
580: break;
581: case ADD_VALUES:
582: case ADD_ALL_VALUES:
583: for (i=0; i<n; i++) {
584: idy = *indicesy++;
585: y[idy] += x[0];
586: y[idy+1] += x[1];
587: x += 2;
588: }
589: break;
590: #if !defined(PETSC_USE_COMPLEX)
591: case MAX_VALUES:
592: for (i=0; i<n; i++) {
593: idy = *indicesy++;
594: y[idy] = PetscMax(y[idy],x[0]);
595: y[idy+1] = PetscMax(y[idy+1],x[1]);
596: x += 2;
597: }
598: #else
599: case MAX_VALUES:
600: #endif
601: case NOT_SET_VALUES:
602: break;
603: default:
604: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
605: }
606: return(0);
607: }
609: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
610: {
611: PetscInt i,idx,idy;
614: switch (addv) {
615: case INSERT_VALUES:
616: case INSERT_ALL_VALUES:
617: for (i=0; i<n; i++) {
618: idx = *indicesx++;
619: idy = *indicesy++;
620: y[idy] = x[idx];
621: y[idy+1] = x[idx+1];
622: }
623: break;
624: case ADD_VALUES:
625: case ADD_ALL_VALUES:
626: for (i=0; i<n; i++) {
627: idx = *indicesx++;
628: idy = *indicesy++;
629: y[idy] += x[idx];
630: y[idy+1] += x[idx+1];
631: }
632: break;
633: #if !defined(PETSC_USE_COMPLEX)
634: case MAX_VALUES:
635: for (i=0; i<n; i++) {
636: idx = *indicesx++;
637: idy = *indicesy++;
638: y[idy] = PetscMax(y[idy],x[idx]);
639: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
640: }
641: #else
642: case MAX_VALUES:
643: #endif
644: case NOT_SET_VALUES:
645: break;
646: default:
647: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
648: }
649: return(0);
650: }
651: /* ----------------------------------------------------------------------------------------------- */
652: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
653: {
654: PetscInt i,idx;
656: for (i=0; i<n; i++) {
657: idx = *indicesx++;
658: y[0] = x[idx];
659: y[1] = x[idx+1];
660: y[2] = x[idx+2];
661: y += 3;
662: }
663: }
664: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
665: {
666: PetscInt i,idy;
669: switch (addv) {
670: case INSERT_VALUES:
671: case INSERT_ALL_VALUES:
672: for (i=0; i<n; i++) {
673: idy = *indicesy++;
674: y[idy] = x[0];
675: y[idy+1] = x[1];
676: y[idy+2] = x[2];
677: x += 3;
678: }
679: break;
680: case ADD_VALUES:
681: case ADD_ALL_VALUES:
682: for (i=0; i<n; i++) {
683: idy = *indicesy++;
684: y[idy] += x[0];
685: y[idy+1] += x[1];
686: y[idy+2] += x[2];
687: x += 3;
688: }
689: break;
690: #if !defined(PETSC_USE_COMPLEX)
691: case MAX_VALUES:
692: for (i=0; i<n; i++) {
693: idy = *indicesy++;
694: y[idy] = PetscMax(y[idy],x[0]);
695: y[idy+1] = PetscMax(y[idy+1],x[1]);
696: y[idy+2] = PetscMax(y[idy+2],x[2]);
697: x += 3;
698: }
699: #else
700: case MAX_VALUES:
701: #endif
702: case NOT_SET_VALUES:
703: break;
704: default:
705: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
706: }
707: return(0);
708: }
710: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
711: {
712: PetscInt i,idx,idy;
715: switch (addv) {
716: case INSERT_VALUES:
717: case INSERT_ALL_VALUES:
718: for (i=0; i<n; i++) {
719: idx = *indicesx++;
720: idy = *indicesy++;
721: y[idy] = x[idx];
722: y[idy+1] = x[idx+1];
723: y[idy+2] = x[idx+2];
724: }
725: break;
726: case ADD_VALUES:
727: case ADD_ALL_VALUES:
728: for (i=0; i<n; i++) {
729: idx = *indicesx++;
730: idy = *indicesy++;
731: y[idy] += x[idx];
732: y[idy+1] += x[idx+1];
733: y[idy+2] += x[idx+2];
734: }
735: break;
736: #if !defined(PETSC_USE_COMPLEX)
737: case MAX_VALUES:
738: for (i=0; i<n; i++) {
739: idx = *indicesx++;
740: idy = *indicesy++;
741: y[idy] = PetscMax(y[idy],x[idx]);
742: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
743: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
744: }
745: #else
746: case MAX_VALUES:
747: #endif
748: case NOT_SET_VALUES:
749: break;
750: default:
751: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
752: }
753: return(0);
754: }
755: /* ----------------------------------------------------------------------------------------------- */
756: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
757: {
758: PetscInt i,idx;
760: for (i=0; i<n; i++) {
761: idx = *indicesx++;
762: y[0] = x[idx];
763: y[1] = x[idx+1];
764: y[2] = x[idx+2];
765: y[3] = x[idx+3];
766: y += 4;
767: }
768: }
769: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
770: {
771: PetscInt i,idy;
774: switch (addv) {
775: case INSERT_VALUES:
776: case INSERT_ALL_VALUES:
777: for (i=0; i<n; i++) {
778: idy = *indicesy++;
779: y[idy] = x[0];
780: y[idy+1] = x[1];
781: y[idy+2] = x[2];
782: y[idy+3] = x[3];
783: x += 4;
784: }
785: break;
786: case ADD_VALUES:
787: case ADD_ALL_VALUES:
788: for (i=0; i<n; i++) {
789: idy = *indicesy++;
790: y[idy] += x[0];
791: y[idy+1] += x[1];
792: y[idy+2] += x[2];
793: y[idy+3] += x[3];
794: x += 4;
795: }
796: break;
797: #if !defined(PETSC_USE_COMPLEX)
798: case MAX_VALUES:
799: for (i=0; i<n; i++) {
800: idy = *indicesy++;
801: y[idy] = PetscMax(y[idy],x[0]);
802: y[idy+1] = PetscMax(y[idy+1],x[1]);
803: y[idy+2] = PetscMax(y[idy+2],x[2]);
804: y[idy+3] = PetscMax(y[idy+3],x[3]);
805: x += 4;
806: }
807: #else
808: case MAX_VALUES:
809: #endif
810: case NOT_SET_VALUES:
811: break;
812: default:
813: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
814: }
815: return(0);
816: }
818: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
819: {
820: PetscInt i,idx,idy;
823: switch (addv) {
824: case INSERT_VALUES:
825: case INSERT_ALL_VALUES:
826: for (i=0; i<n; i++) {
827: idx = *indicesx++;
828: idy = *indicesy++;
829: y[idy] = x[idx];
830: y[idy+1] = x[idx+1];
831: y[idy+2] = x[idx+2];
832: y[idy+3] = x[idx+3];
833: }
834: break;
835: case ADD_VALUES:
836: case ADD_ALL_VALUES:
837: for (i=0; i<n; i++) {
838: idx = *indicesx++;
839: idy = *indicesy++;
840: y[idy] += x[idx];
841: y[idy+1] += x[idx+1];
842: y[idy+2] += x[idx+2];
843: y[idy+3] += x[idx+3];
844: }
845: break;
846: #if !defined(PETSC_USE_COMPLEX)
847: case MAX_VALUES:
848: for (i=0; i<n; i++) {
849: idx = *indicesx++;
850: idy = *indicesy++;
851: y[idy] = PetscMax(y[idy],x[idx]);
852: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
853: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
854: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
855: }
856: #else
857: case MAX_VALUES:
858: #endif
859: case NOT_SET_VALUES:
860: break;
861: default:
862: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
863: }
864: return(0);
865: }
866: /* ----------------------------------------------------------------------------------------------- */
867: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
868: {
869: PetscInt i,idx;
871: for (i=0; i<n; i++) {
872: idx = *indicesx++;
873: y[0] = x[idx];
874: y[1] = x[idx+1];
875: y[2] = x[idx+2];
876: y[3] = x[idx+3];
877: y[4] = x[idx+4];
878: y += 5;
879: }
880: }
882: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
883: {
884: PetscInt i,idy;
887: switch (addv) {
888: case INSERT_VALUES:
889: case INSERT_ALL_VALUES:
890: for (i=0; i<n; i++) {
891: idy = *indicesy++;
892: y[idy] = x[0];
893: y[idy+1] = x[1];
894: y[idy+2] = x[2];
895: y[idy+3] = x[3];
896: y[idy+4] = x[4];
897: x += 5;
898: }
899: break;
900: case ADD_VALUES:
901: case ADD_ALL_VALUES:
902: for (i=0; i<n; i++) {
903: idy = *indicesy++;
904: y[idy] += x[0];
905: y[idy+1] += x[1];
906: y[idy+2] += x[2];
907: y[idy+3] += x[3];
908: y[idy+4] += x[4];
909: x += 5;
910: }
911: break;
912: #if !defined(PETSC_USE_COMPLEX)
913: case MAX_VALUES:
914: for (i=0; i<n; i++) {
915: idy = *indicesy++;
916: y[idy] = PetscMax(y[idy],x[0]);
917: y[idy+1] = PetscMax(y[idy+1],x[1]);
918: y[idy+2] = PetscMax(y[idy+2],x[2]);
919: y[idy+3] = PetscMax(y[idy+3],x[3]);
920: y[idy+4] = PetscMax(y[idy+4],x[4]);
921: x += 5;
922: }
923: #else
924: case MAX_VALUES:
925: #endif
926: case NOT_SET_VALUES:
927: break;
928: default:
929: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
930: }
931: return(0);
932: }
934: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
935: {
936: PetscInt i,idx,idy;
939: switch (addv) {
940: case INSERT_VALUES:
941: case INSERT_ALL_VALUES:
942: for (i=0; i<n; i++) {
943: idx = *indicesx++;
944: idy = *indicesy++;
945: y[idy] = x[idx];
946: y[idy+1] = x[idx+1];
947: y[idy+2] = x[idx+2];
948: y[idy+3] = x[idx+3];
949: y[idy+4] = x[idx+4];
950: }
951: break;
952: case ADD_VALUES:
953: case ADD_ALL_VALUES:
954: for (i=0; i<n; i++) {
955: idx = *indicesx++;
956: idy = *indicesy++;
957: y[idy] += x[idx];
958: y[idy+1] += x[idx+1];
959: y[idy+2] += x[idx+2];
960: y[idy+3] += x[idx+3];
961: y[idy+4] += x[idx+4];
962: }
963: break;
964: #if !defined(PETSC_USE_COMPLEX)
965: case MAX_VALUES:
966: for (i=0; i<n; i++) {
967: idx = *indicesx++;
968: idy = *indicesy++;
969: y[idy] = PetscMax(y[idy],x[idx]);
970: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
971: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
972: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
973: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
974: }
975: #else
976: case MAX_VALUES:
977: #endif
978: case NOT_SET_VALUES:
979: break;
980: default:
981: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
982: }
983: return(0);
984: }
985: /* ----------------------------------------------------------------------------------------------- */
986: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
987: {
988: PetscInt i,idx;
990: for (i=0; i<n; i++) {
991: idx = *indicesx++;
992: y[0] = x[idx];
993: y[1] = x[idx+1];
994: y[2] = x[idx+2];
995: y[3] = x[idx+3];
996: y[4] = x[idx+4];
997: y[5] = x[idx+5];
998: y += 6;
999: }
1000: }
1002: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1003: {
1004: PetscInt i,idy;
1007: switch (addv) {
1008: case INSERT_VALUES:
1009: case INSERT_ALL_VALUES:
1010: for (i=0; i<n; i++) {
1011: idy = *indicesy++;
1012: y[idy] = x[0];
1013: y[idy+1] = x[1];
1014: y[idy+2] = x[2];
1015: y[idy+3] = x[3];
1016: y[idy+4] = x[4];
1017: y[idy+5] = x[5];
1018: x += 6;
1019: }
1020: break;
1021: case ADD_VALUES:
1022: case ADD_ALL_VALUES:
1023: for (i=0; i<n; i++) {
1024: idy = *indicesy++;
1025: y[idy] += x[0];
1026: y[idy+1] += x[1];
1027: y[idy+2] += x[2];
1028: y[idy+3] += x[3];
1029: y[idy+4] += x[4];
1030: y[idy+5] += x[5];
1031: x += 6;
1032: }
1033: break;
1034: #if !defined(PETSC_USE_COMPLEX)
1035: case MAX_VALUES:
1036: for (i=0; i<n; i++) {
1037: idy = *indicesy++;
1038: y[idy] = PetscMax(y[idy],x[0]);
1039: y[idy+1] = PetscMax(y[idy+1],x[1]);
1040: y[idy+2] = PetscMax(y[idy+2],x[2]);
1041: y[idy+3] = PetscMax(y[idy+3],x[3]);
1042: y[idy+4] = PetscMax(y[idy+4],x[4]);
1043: y[idy+5] = PetscMax(y[idy+5],x[5]);
1044: x += 6;
1045: }
1046: #else
1047: case MAX_VALUES:
1048: #endif
1049: case NOT_SET_VALUES:
1050: break;
1051: default:
1052: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1053: }
1054: return(0);
1055: }
1057: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1058: {
1059: PetscInt i,idx,idy;
1062: switch (addv) {
1063: case INSERT_VALUES:
1064: case INSERT_ALL_VALUES:
1065: for (i=0; i<n; i++) {
1066: idx = *indicesx++;
1067: idy = *indicesy++;
1068: y[idy] = x[idx];
1069: y[idy+1] = x[idx+1];
1070: y[idy+2] = x[idx+2];
1071: y[idy+3] = x[idx+3];
1072: y[idy+4] = x[idx+4];
1073: y[idy+5] = x[idx+5];
1074: }
1075: break;
1076: case ADD_VALUES:
1077: case ADD_ALL_VALUES:
1078: for (i=0; i<n; i++) {
1079: idx = *indicesx++;
1080: idy = *indicesy++;
1081: y[idy] += x[idx];
1082: y[idy+1] += x[idx+1];
1083: y[idy+2] += x[idx+2];
1084: y[idy+3] += x[idx+3];
1085: y[idy+4] += x[idx+4];
1086: y[idy+5] += x[idx+5];
1087: }
1088: break;
1089: #if !defined(PETSC_USE_COMPLEX)
1090: case MAX_VALUES:
1091: for (i=0; i<n; i++) {
1092: idx = *indicesx++;
1093: idy = *indicesy++;
1094: y[idy] = PetscMax(y[idy],x[idx]);
1095: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1096: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1097: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1098: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1099: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1100: }
1101: #else
1102: case MAX_VALUES:
1103: #endif
1104: case NOT_SET_VALUES:
1105: break;
1106: default:
1107: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1108: }
1109: return(0);
1110: }
1111: /* ----------------------------------------------------------------------------------------------- */
1112: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1113: {
1114: PetscInt i,idx;
1116: for (i=0; i<n; i++) {
1117: idx = *indicesx++;
1118: y[0] = x[idx];
1119: y[1] = x[idx+1];
1120: y[2] = x[idx+2];
1121: y[3] = x[idx+3];
1122: y[4] = x[idx+4];
1123: y[5] = x[idx+5];
1124: y[6] = x[idx+6];
1125: y += 7;
1126: }
1127: }
1129: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1130: {
1131: PetscInt i,idy;
1134: switch (addv) {
1135: case INSERT_VALUES:
1136: case INSERT_ALL_VALUES:
1137: for (i=0; i<n; i++) {
1138: idy = *indicesy++;
1139: y[idy] = x[0];
1140: y[idy+1] = x[1];
1141: y[idy+2] = x[2];
1142: y[idy+3] = x[3];
1143: y[idy+4] = x[4];
1144: y[idy+5] = x[5];
1145: y[idy+6] = x[6];
1146: x += 7;
1147: }
1148: break;
1149: case ADD_VALUES:
1150: case ADD_ALL_VALUES:
1151: for (i=0; i<n; i++) {
1152: idy = *indicesy++;
1153: y[idy] += x[0];
1154: y[idy+1] += x[1];
1155: y[idy+2] += x[2];
1156: y[idy+3] += x[3];
1157: y[idy+4] += x[4];
1158: y[idy+5] += x[5];
1159: y[idy+6] += x[6];
1160: x += 7;
1161: }
1162: break;
1163: #if !defined(PETSC_USE_COMPLEX)
1164: case MAX_VALUES:
1165: for (i=0; i<n; i++) {
1166: idy = *indicesy++;
1167: y[idy] = PetscMax(y[idy],x[0]);
1168: y[idy+1] = PetscMax(y[idy+1],x[1]);
1169: y[idy+2] = PetscMax(y[idy+2],x[2]);
1170: y[idy+3] = PetscMax(y[idy+3],x[3]);
1171: y[idy+4] = PetscMax(y[idy+4],x[4]);
1172: y[idy+5] = PetscMax(y[idy+5],x[5]);
1173: y[idy+6] = PetscMax(y[idy+6],x[6]);
1174: x += 7;
1175: }
1176: #else
1177: case MAX_VALUES:
1178: #endif
1179: case NOT_SET_VALUES:
1180: break;
1181: default:
1182: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1183: }
1184: return(0);
1185: }
1187: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1188: {
1189: PetscInt i,idx,idy;
1192: switch (addv) {
1193: case INSERT_VALUES:
1194: case INSERT_ALL_VALUES:
1195: for (i=0; i<n; i++) {
1196: idx = *indicesx++;
1197: idy = *indicesy++;
1198: y[idy] = x[idx];
1199: y[idy+1] = x[idx+1];
1200: y[idy+2] = x[idx+2];
1201: y[idy+3] = x[idx+3];
1202: y[idy+4] = x[idx+4];
1203: y[idy+5] = x[idx+5];
1204: y[idy+6] = x[idx+6];
1205: }
1206: break;
1207: case ADD_VALUES:
1208: case ADD_ALL_VALUES:
1209: for (i=0; i<n; i++) {
1210: idx = *indicesx++;
1211: idy = *indicesy++;
1212: y[idy] += x[idx];
1213: y[idy+1] += x[idx+1];
1214: y[idy+2] += x[idx+2];
1215: y[idy+3] += x[idx+3];
1216: y[idy+4] += x[idx+4];
1217: y[idy+5] += x[idx+5];
1218: y[idy+6] += x[idx+6];
1219: }
1220: break;
1221: #if !defined(PETSC_USE_COMPLEX)
1222: case MAX_VALUES:
1223: for (i=0; i<n; i++) {
1224: idx = *indicesx++;
1225: idy = *indicesy++;
1226: y[idy] = PetscMax(y[idy],x[idx]);
1227: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1228: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1229: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1230: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1231: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1232: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1233: }
1234: #else
1235: case MAX_VALUES:
1236: #endif
1237: case NOT_SET_VALUES:
1238: break;
1239: default:
1240: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1241: }
1242: return(0);
1243: }
1244: /* ----------------------------------------------------------------------------------------------- */
1245: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1246: {
1247: PetscInt i,idx;
1249: for (i=0; i<n; i++) {
1250: idx = *indicesx++;
1251: y[0] = x[idx];
1252: y[1] = x[idx+1];
1253: y[2] = x[idx+2];
1254: y[3] = x[idx+3];
1255: y[4] = x[idx+4];
1256: y[5] = x[idx+5];
1257: y[6] = x[idx+6];
1258: y[7] = x[idx+7];
1259: y += 8;
1260: }
1261: }
1263: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1264: {
1265: PetscInt i,idy;
1268: switch (addv) {
1269: case INSERT_VALUES:
1270: case INSERT_ALL_VALUES:
1271: for (i=0; i<n; i++) {
1272: idy = *indicesy++;
1273: y[idy] = x[0];
1274: y[idy+1] = x[1];
1275: y[idy+2] = x[2];
1276: y[idy+3] = x[3];
1277: y[idy+4] = x[4];
1278: y[idy+5] = x[5];
1279: y[idy+6] = x[6];
1280: y[idy+7] = x[7];
1281: x += 8;
1282: }
1283: break;
1284: case ADD_VALUES:
1285: case ADD_ALL_VALUES:
1286: for (i=0; i<n; i++) {
1287: idy = *indicesy++;
1288: y[idy] += x[0];
1289: y[idy+1] += x[1];
1290: y[idy+2] += x[2];
1291: y[idy+3] += x[3];
1292: y[idy+4] += x[4];
1293: y[idy+5] += x[5];
1294: y[idy+6] += x[6];
1295: y[idy+7] += x[7];
1296: x += 8;
1297: }
1298: break;
1299: #if !defined(PETSC_USE_COMPLEX)
1300: case MAX_VALUES:
1301: for (i=0; i<n; i++) {
1302: idy = *indicesy++;
1303: y[idy] = PetscMax(y[idy],x[0]);
1304: y[idy+1] = PetscMax(y[idy+1],x[1]);
1305: y[idy+2] = PetscMax(y[idy+2],x[2]);
1306: y[idy+3] = PetscMax(y[idy+3],x[3]);
1307: y[idy+4] = PetscMax(y[idy+4],x[4]);
1308: y[idy+5] = PetscMax(y[idy+5],x[5]);
1309: y[idy+6] = PetscMax(y[idy+6],x[6]);
1310: y[idy+7] = PetscMax(y[idy+7],x[7]);
1311: x += 8;
1312: }
1313: #else
1314: case MAX_VALUES:
1315: #endif
1316: case NOT_SET_VALUES:
1317: break;
1318: default:
1319: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1320: }
1321: return(0);
1322: }
1324: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1325: {
1326: PetscInt i,idx,idy;
1329: switch (addv) {
1330: case INSERT_VALUES:
1331: case INSERT_ALL_VALUES:
1332: for (i=0; i<n; i++) {
1333: idx = *indicesx++;
1334: idy = *indicesy++;
1335: y[idy] = x[idx];
1336: y[idy+1] = x[idx+1];
1337: y[idy+2] = x[idx+2];
1338: y[idy+3] = x[idx+3];
1339: y[idy+4] = x[idx+4];
1340: y[idy+5] = x[idx+5];
1341: y[idy+6] = x[idx+6];
1342: y[idy+7] = x[idx+7];
1343: }
1344: break;
1345: case ADD_VALUES:
1346: case ADD_ALL_VALUES:
1347: for (i=0; i<n; i++) {
1348: idx = *indicesx++;
1349: idy = *indicesy++;
1350: y[idy] += x[idx];
1351: y[idy+1] += x[idx+1];
1352: y[idy+2] += x[idx+2];
1353: y[idy+3] += x[idx+3];
1354: y[idy+4] += x[idx+4];
1355: y[idy+5] += x[idx+5];
1356: y[idy+6] += x[idx+6];
1357: y[idy+7] += x[idx+7];
1358: }
1359: break;
1360: #if !defined(PETSC_USE_COMPLEX)
1361: case MAX_VALUES:
1362: for (i=0; i<n; i++) {
1363: idx = *indicesx++;
1364: idy = *indicesy++;
1365: y[idy] = PetscMax(y[idy],x[idx]);
1366: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1367: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1368: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1369: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1370: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1371: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1372: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1373: }
1374: #else
1375: case MAX_VALUES:
1376: #endif
1377: case NOT_SET_VALUES:
1378: break;
1379: default:
1380: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1381: }
1382: return(0);
1383: }
1385: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1386: {
1387: PetscInt i,idx;
1389: for (i=0; i<n; i++) {
1390: idx = *indicesx++;
1391: y[0] = x[idx];
1392: y[1] = x[idx+1];
1393: y[2] = x[idx+2];
1394: y[3] = x[idx+3];
1395: y[4] = x[idx+4];
1396: y[5] = x[idx+5];
1397: y[6] = x[idx+6];
1398: y[7] = x[idx+7];
1399: y[8] = x[idx+8];
1400: y += 9;
1401: }
1402: }
1404: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1405: {
1406: PetscInt i,idy;
1409: switch (addv) {
1410: case INSERT_VALUES:
1411: case INSERT_ALL_VALUES:
1412: for (i=0; i<n; i++) {
1413: idy = *indicesy++;
1414: y[idy] = x[0];
1415: y[idy+1] = x[1];
1416: y[idy+2] = x[2];
1417: y[idy+3] = x[3];
1418: y[idy+4] = x[4];
1419: y[idy+5] = x[5];
1420: y[idy+6] = x[6];
1421: y[idy+7] = x[7];
1422: y[idy+8] = x[8];
1423: x += 9;
1424: }
1425: break;
1426: case ADD_VALUES:
1427: case ADD_ALL_VALUES:
1428: for (i=0; i<n; i++) {
1429: idy = *indicesy++;
1430: y[idy] += x[0];
1431: y[idy+1] += x[1];
1432: y[idy+2] += x[2];
1433: y[idy+3] += x[3];
1434: y[idy+4] += x[4];
1435: y[idy+5] += x[5];
1436: y[idy+6] += x[6];
1437: y[idy+7] += x[7];
1438: y[idy+8] += x[8];
1439: x += 9;
1440: }
1441: break;
1442: #if !defined(PETSC_USE_COMPLEX)
1443: case MAX_VALUES:
1444: for (i=0; i<n; i++) {
1445: idy = *indicesy++;
1446: y[idy] = PetscMax(y[idy],x[0]);
1447: y[idy+1] = PetscMax(y[idy+1],x[1]);
1448: y[idy+2] = PetscMax(y[idy+2],x[2]);
1449: y[idy+3] = PetscMax(y[idy+3],x[3]);
1450: y[idy+4] = PetscMax(y[idy+4],x[4]);
1451: y[idy+5] = PetscMax(y[idy+5],x[5]);
1452: y[idy+6] = PetscMax(y[idy+6],x[6]);
1453: y[idy+7] = PetscMax(y[idy+7],x[7]);
1454: y[idy+8] = PetscMax(y[idy+8],x[8]);
1455: x += 9;
1456: }
1457: #else
1458: case MAX_VALUES:
1459: #endif
1460: case NOT_SET_VALUES:
1461: break;
1462: default:
1463: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1464: }
1465: return(0);
1466: }
1468: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1469: {
1470: PetscInt i,idx,idy;
1473: switch (addv) {
1474: case INSERT_VALUES:
1475: case INSERT_ALL_VALUES:
1476: for (i=0; i<n; i++) {
1477: idx = *indicesx++;
1478: idy = *indicesy++;
1479: y[idy] = x[idx];
1480: y[idy+1] = x[idx+1];
1481: y[idy+2] = x[idx+2];
1482: y[idy+3] = x[idx+3];
1483: y[idy+4] = x[idx+4];
1484: y[idy+5] = x[idx+5];
1485: y[idy+6] = x[idx+6];
1486: y[idy+7] = x[idx+7];
1487: y[idy+8] = x[idx+8];
1488: }
1489: break;
1490: case ADD_VALUES:
1491: case ADD_ALL_VALUES:
1492: for (i=0; i<n; i++) {
1493: idx = *indicesx++;
1494: idy = *indicesy++;
1495: y[idy] += x[idx];
1496: y[idy+1] += x[idx+1];
1497: y[idy+2] += x[idx+2];
1498: y[idy+3] += x[idx+3];
1499: y[idy+4] += x[idx+4];
1500: y[idy+5] += x[idx+5];
1501: y[idy+6] += x[idx+6];
1502: y[idy+7] += x[idx+7];
1503: y[idy+8] += x[idx+8];
1504: }
1505: break;
1506: #if !defined(PETSC_USE_COMPLEX)
1507: case MAX_VALUES:
1508: for (i=0; i<n; i++) {
1509: idx = *indicesx++;
1510: idy = *indicesy++;
1511: y[idy] = PetscMax(y[idy],x[idx]);
1512: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1513: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1514: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1515: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1516: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1517: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1518: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1519: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1520: }
1521: #else
1522: case MAX_VALUES:
1523: #endif
1524: case NOT_SET_VALUES:
1525: break;
1526: default:
1527: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1528: }
1529: return(0);
1530: }
1532: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1533: {
1534: PetscInt i,idx;
1536: for (i=0; i<n; i++) {
1537: idx = *indicesx++;
1538: y[0] = x[idx];
1539: y[1] = x[idx+1];
1540: y[2] = x[idx+2];
1541: y[3] = x[idx+3];
1542: y[4] = x[idx+4];
1543: y[5] = x[idx+5];
1544: y[6] = x[idx+6];
1545: y[7] = x[idx+7];
1546: y[8] = x[idx+8];
1547: y[9] = x[idx+9];
1548: y += 10;
1549: }
1550: }
1552: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1553: {
1554: PetscInt i,idy;
1557: switch (addv) {
1558: case INSERT_VALUES:
1559: case INSERT_ALL_VALUES:
1560: for (i=0; i<n; i++) {
1561: idy = *indicesy++;
1562: y[idy] = x[0];
1563: y[idy+1] = x[1];
1564: y[idy+2] = x[2];
1565: y[idy+3] = x[3];
1566: y[idy+4] = x[4];
1567: y[idy+5] = x[5];
1568: y[idy+6] = x[6];
1569: y[idy+7] = x[7];
1570: y[idy+8] = x[8];
1571: y[idy+9] = x[9];
1572: x += 10;
1573: }
1574: break;
1575: case ADD_VALUES:
1576: case ADD_ALL_VALUES:
1577: for (i=0; i<n; i++) {
1578: idy = *indicesy++;
1579: y[idy] += x[0];
1580: y[idy+1] += x[1];
1581: y[idy+2] += x[2];
1582: y[idy+3] += x[3];
1583: y[idy+4] += x[4];
1584: y[idy+5] += x[5];
1585: y[idy+6] += x[6];
1586: y[idy+7] += x[7];
1587: y[idy+8] += x[8];
1588: y[idy+9] += x[9];
1589: x += 10;
1590: }
1591: break;
1592: #if !defined(PETSC_USE_COMPLEX)
1593: case MAX_VALUES:
1594: for (i=0; i<n; i++) {
1595: idy = *indicesy++;
1596: y[idy] = PetscMax(y[idy],x[0]);
1597: y[idy+1] = PetscMax(y[idy+1],x[1]);
1598: y[idy+2] = PetscMax(y[idy+2],x[2]);
1599: y[idy+3] = PetscMax(y[idy+3],x[3]);
1600: y[idy+4] = PetscMax(y[idy+4],x[4]);
1601: y[idy+5] = PetscMax(y[idy+5],x[5]);
1602: y[idy+6] = PetscMax(y[idy+6],x[6]);
1603: y[idy+7] = PetscMax(y[idy+7],x[7]);
1604: y[idy+8] = PetscMax(y[idy+8],x[8]);
1605: y[idy+9] = PetscMax(y[idy+9],x[9]);
1606: x += 10;
1607: }
1608: #else
1609: case MAX_VALUES:
1610: #endif
1611: case NOT_SET_VALUES:
1612: break;
1613: default:
1614: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1615: }
1616: return(0);
1617: }
1619: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1620: {
1621: PetscInt i,idx,idy;
1624: switch (addv) {
1625: case INSERT_VALUES:
1626: case INSERT_ALL_VALUES:
1627: for (i=0; i<n; i++) {
1628: idx = *indicesx++;
1629: idy = *indicesy++;
1630: y[idy] = x[idx];
1631: y[idy+1] = x[idx+1];
1632: y[idy+2] = x[idx+2];
1633: y[idy+3] = x[idx+3];
1634: y[idy+4] = x[idx+4];
1635: y[idy+5] = x[idx+5];
1636: y[idy+6] = x[idx+6];
1637: y[idy+7] = x[idx+7];
1638: y[idy+8] = x[idx+8];
1639: y[idy+9] = x[idx+9];
1640: }
1641: break;
1642: case ADD_VALUES:
1643: case ADD_ALL_VALUES:
1644: for (i=0; i<n; i++) {
1645: idx = *indicesx++;
1646: idy = *indicesy++;
1647: y[idy] += x[idx];
1648: y[idy+1] += x[idx+1];
1649: y[idy+2] += x[idx+2];
1650: y[idy+3] += x[idx+3];
1651: y[idy+4] += x[idx+4];
1652: y[idy+5] += x[idx+5];
1653: y[idy+6] += x[idx+6];
1654: y[idy+7] += x[idx+7];
1655: y[idy+8] += x[idx+8];
1656: y[idy+9] += x[idx+9];
1657: }
1658: break;
1659: #if !defined(PETSC_USE_COMPLEX)
1660: case MAX_VALUES:
1661: for (i=0; i<n; i++) {
1662: idx = *indicesx++;
1663: idy = *indicesy++;
1664: y[idy] = PetscMax(y[idy],x[idx]);
1665: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1666: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1667: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1668: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1669: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1670: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1671: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1672: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1673: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1674: }
1675: #else
1676: case MAX_VALUES:
1677: #endif
1678: case NOT_SET_VALUES:
1679: break;
1680: default:
1681: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1682: }
1683: return(0);
1684: }
1686: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1687: {
1688: PetscInt i,idx;
1690: for (i=0; i<n; i++) {
1691: idx = *indicesx++;
1692: y[0] = x[idx];
1693: y[1] = x[idx+1];
1694: y[2] = x[idx+2];
1695: y[3] = x[idx+3];
1696: y[4] = x[idx+4];
1697: y[5] = x[idx+5];
1698: y[6] = x[idx+6];
1699: y[7] = x[idx+7];
1700: y[8] = x[idx+8];
1701: y[9] = x[idx+9];
1702: y[10] = x[idx+10];
1703: y += 11;
1704: }
1705: }
1707: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1708: {
1709: PetscInt i,idy;
1712: switch (addv) {
1713: case INSERT_VALUES:
1714: case INSERT_ALL_VALUES:
1715: for (i=0; i<n; i++) {
1716: idy = *indicesy++;
1717: y[idy] = x[0];
1718: y[idy+1] = x[1];
1719: y[idy+2] = x[2];
1720: y[idy+3] = x[3];
1721: y[idy+4] = x[4];
1722: y[idy+5] = x[5];
1723: y[idy+6] = x[6];
1724: y[idy+7] = x[7];
1725: y[idy+8] = x[8];
1726: y[idy+9] = x[9];
1727: y[idy+10] = x[10];
1728: x += 11;
1729: }
1730: break;
1731: case ADD_VALUES:
1732: case ADD_ALL_VALUES:
1733: for (i=0; i<n; i++) {
1734: idy = *indicesy++;
1735: y[idy] += x[0];
1736: y[idy+1] += x[1];
1737: y[idy+2] += x[2];
1738: y[idy+3] += x[3];
1739: y[idy+4] += x[4];
1740: y[idy+5] += x[5];
1741: y[idy+6] += x[6];
1742: y[idy+7] += x[7];
1743: y[idy+8] += x[8];
1744: y[idy+9] += x[9];
1745: y[idy+10] += x[10];
1746: x += 11;
1747: }
1748: break;
1749: #if !defined(PETSC_USE_COMPLEX)
1750: case MAX_VALUES:
1751: for (i=0; i<n; i++) {
1752: idy = *indicesy++;
1753: y[idy] = PetscMax(y[idy],x[0]);
1754: y[idy+1] = PetscMax(y[idy+1],x[1]);
1755: y[idy+2] = PetscMax(y[idy+2],x[2]);
1756: y[idy+3] = PetscMax(y[idy+3],x[3]);
1757: y[idy+4] = PetscMax(y[idy+4],x[4]);
1758: y[idy+5] = PetscMax(y[idy+5],x[5]);
1759: y[idy+6] = PetscMax(y[idy+6],x[6]);
1760: y[idy+7] = PetscMax(y[idy+7],x[7]);
1761: y[idy+8] = PetscMax(y[idy+8],x[8]);
1762: y[idy+9] = PetscMax(y[idy+9],x[9]);
1763: y[idy+10] = PetscMax(y[idy+10],x[10]);
1764: x += 11;
1765: }
1766: #else
1767: case MAX_VALUES:
1768: #endif
1769: case NOT_SET_VALUES:
1770: break;
1771: default:
1772: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1773: }
1774: return(0);
1775: }
1777: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1778: {
1779: PetscInt i,idx,idy;
1782: switch (addv) {
1783: case INSERT_VALUES:
1784: case INSERT_ALL_VALUES:
1785: for (i=0; i<n; i++) {
1786: idx = *indicesx++;
1787: idy = *indicesy++;
1788: y[idy] = x[idx];
1789: y[idy+1] = x[idx+1];
1790: y[idy+2] = x[idx+2];
1791: y[idy+3] = x[idx+3];
1792: y[idy+4] = x[idx+4];
1793: y[idy+5] = x[idx+5];
1794: y[idy+6] = x[idx+6];
1795: y[idy+7] = x[idx+7];
1796: y[idy+8] = x[idx+8];
1797: y[idy+9] = x[idx+9];
1798: y[idy+10] = x[idx+10];
1799: }
1800: break;
1801: case ADD_VALUES:
1802: case ADD_ALL_VALUES:
1803: for (i=0; i<n; i++) {
1804: idx = *indicesx++;
1805: idy = *indicesy++;
1806: y[idy] += x[idx];
1807: y[idy+1] += x[idx+1];
1808: y[idy+2] += x[idx+2];
1809: y[idy+3] += x[idx+3];
1810: y[idy+4] += x[idx+4];
1811: y[idy+5] += x[idx+5];
1812: y[idy+6] += x[idx+6];
1813: y[idy+7] += x[idx+7];
1814: y[idy+8] += x[idx+8];
1815: y[idy+9] += x[idx+9];
1816: y[idy+10] += x[idx+10];
1817: }
1818: break;
1819: #if !defined(PETSC_USE_COMPLEX)
1820: case MAX_VALUES:
1821: for (i=0; i<n; i++) {
1822: idx = *indicesx++;
1823: idy = *indicesy++;
1824: y[idy] = PetscMax(y[idy],x[idx]);
1825: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1826: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1827: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1828: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1829: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1830: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1831: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1832: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1833: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1834: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1835: }
1836: #else
1837: case MAX_VALUES:
1838: #endif
1839: case NOT_SET_VALUES:
1840: break;
1841: default:
1842: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1843: }
1844: return(0);
1845: }
1847: /* ----------------------------------------------------------------------------------------------- */
1848: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1849: {
1850: PetscInt i,idx;
1852: for (i=0; i<n; i++) {
1853: idx = *indicesx++;
1854: y[0] = x[idx];
1855: y[1] = x[idx+1];
1856: y[2] = x[idx+2];
1857: y[3] = x[idx+3];
1858: y[4] = x[idx+4];
1859: y[5] = x[idx+5];
1860: y[6] = x[idx+6];
1861: y[7] = x[idx+7];
1862: y[8] = x[idx+8];
1863: y[9] = x[idx+9];
1864: y[10] = x[idx+10];
1865: y[11] = x[idx+11];
1866: y += 12;
1867: }
1868: }
1870: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1871: {
1872: PetscInt i,idy;
1875: switch (addv) {
1876: case INSERT_VALUES:
1877: case INSERT_ALL_VALUES:
1878: for (i=0; i<n; i++) {
1879: idy = *indicesy++;
1880: y[idy] = x[0];
1881: y[idy+1] = x[1];
1882: y[idy+2] = x[2];
1883: y[idy+3] = x[3];
1884: y[idy+4] = x[4];
1885: y[idy+5] = x[5];
1886: y[idy+6] = x[6];
1887: y[idy+7] = x[7];
1888: y[idy+8] = x[8];
1889: y[idy+9] = x[9];
1890: y[idy+10] = x[10];
1891: y[idy+11] = x[11];
1892: x += 12;
1893: }
1894: break;
1895: case ADD_VALUES:
1896: case ADD_ALL_VALUES:
1897: for (i=0; i<n; i++) {
1898: idy = *indicesy++;
1899: y[idy] += x[0];
1900: y[idy+1] += x[1];
1901: y[idy+2] += x[2];
1902: y[idy+3] += x[3];
1903: y[idy+4] += x[4];
1904: y[idy+5] += x[5];
1905: y[idy+6] += x[6];
1906: y[idy+7] += x[7];
1907: y[idy+8] += x[8];
1908: y[idy+9] += x[9];
1909: y[idy+10] += x[10];
1910: y[idy+11] += x[11];
1911: x += 12;
1912: }
1913: break;
1914: #if !defined(PETSC_USE_COMPLEX)
1915: case MAX_VALUES:
1916: for (i=0; i<n; i++) {
1917: idy = *indicesy++;
1918: y[idy] = PetscMax(y[idy],x[0]);
1919: y[idy+1] = PetscMax(y[idy+1],x[1]);
1920: y[idy+2] = PetscMax(y[idy+2],x[2]);
1921: y[idy+3] = PetscMax(y[idy+3],x[3]);
1922: y[idy+4] = PetscMax(y[idy+4],x[4]);
1923: y[idy+5] = PetscMax(y[idy+5],x[5]);
1924: y[idy+6] = PetscMax(y[idy+6],x[6]);
1925: y[idy+7] = PetscMax(y[idy+7],x[7]);
1926: y[idy+8] = PetscMax(y[idy+8],x[8]);
1927: y[idy+9] = PetscMax(y[idy+9],x[9]);
1928: y[idy+10] = PetscMax(y[idy+10],x[10]);
1929: y[idy+11] = PetscMax(y[idy+11],x[11]);
1930: x += 12;
1931: }
1932: #else
1933: case MAX_VALUES:
1934: #endif
1935: case NOT_SET_VALUES:
1936: break;
1937: default:
1938: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1939: }
1940: return(0);
1941: }
1943: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1944: {
1945: PetscInt i,idx,idy;
1948: switch (addv) {
1949: case INSERT_VALUES:
1950: case INSERT_ALL_VALUES:
1951: for (i=0; i<n; i++) {
1952: idx = *indicesx++;
1953: idy = *indicesy++;
1954: y[idy] = x[idx];
1955: y[idy+1] = x[idx+1];
1956: y[idy+2] = x[idx+2];
1957: y[idy+3] = x[idx+3];
1958: y[idy+4] = x[idx+4];
1959: y[idy+5] = x[idx+5];
1960: y[idy+6] = x[idx+6];
1961: y[idy+7] = x[idx+7];
1962: y[idy+8] = x[idx+8];
1963: y[idy+9] = x[idx+9];
1964: y[idy+10] = x[idx+10];
1965: y[idy+11] = x[idx+11];
1966: }
1967: break;
1968: case ADD_VALUES:
1969: case ADD_ALL_VALUES:
1970: for (i=0; i<n; i++) {
1971: idx = *indicesx++;
1972: idy = *indicesy++;
1973: y[idy] += x[idx];
1974: y[idy+1] += x[idx+1];
1975: y[idy+2] += x[idx+2];
1976: y[idy+3] += x[idx+3];
1977: y[idy+4] += x[idx+4];
1978: y[idy+5] += x[idx+5];
1979: y[idy+6] += x[idx+6];
1980: y[idy+7] += x[idx+7];
1981: y[idy+8] += x[idx+8];
1982: y[idy+9] += x[idx+9];
1983: y[idy+10] += x[idx+10];
1984: y[idy+11] += x[idx+11];
1985: }
1986: break;
1987: #if !defined(PETSC_USE_COMPLEX)
1988: case MAX_VALUES:
1989: for (i=0; i<n; i++) {
1990: idx = *indicesx++;
1991: idy = *indicesy++;
1992: y[idy] = PetscMax(y[idy],x[idx]);
1993: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1994: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1995: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1996: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1997: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1998: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1999: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
2000: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
2001: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
2002: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
2003: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
2004: }
2005: #else
2006: case MAX_VALUES:
2007: #endif
2008: case NOT_SET_VALUES:
2009: break;
2010: default:
2011: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2012: }
2013: return(0);
2014: }
2016: /* ----------------------------------------------------------------------------------------------- */
2017: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
2018: {
2019: PetscInt i,idx;
2021: for (i=0; i<n; i++) {
2022: idx = *indicesx++;
2023: PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
2024: y += bs;
2025: }
2026: }
2028: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2029: {
2030: PetscInt i,idy,j;
2033: switch (addv) {
2034: case INSERT_VALUES:
2035: case INSERT_ALL_VALUES:
2036: for (i=0; i<n; i++) {
2037: idy = *indicesy++;
2038: PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
2039: x += bs;
2040: }
2041: break;
2042: case ADD_VALUES:
2043: case ADD_ALL_VALUES:
2044: for (i=0; i<n; i++) {
2045: idy = *indicesy++;
2046: for (j=0; j<bs; j++) y[idy+j] += x[j];
2047: x += bs;
2048: }
2049: break;
2050: #if !defined(PETSC_USE_COMPLEX)
2051: case MAX_VALUES:
2052: for (i=0; i<n; i++) {
2053: idy = *indicesy++;
2054: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2055: x += bs;
2056: }
2057: #else
2058: case MAX_VALUES:
2059: #endif
2060: case NOT_SET_VALUES:
2061: break;
2062: default:
2063: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2064: }
2065: return(0);
2066: }
2068: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2069: {
2070: PetscInt i,idx,idy,j;
2073: switch (addv) {
2074: case INSERT_VALUES:
2075: case INSERT_ALL_VALUES:
2076: for (i=0; i<n; i++) {
2077: idx = *indicesx++;
2078: idy = *indicesy++;
2079: PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2080: }
2081: break;
2082: case ADD_VALUES:
2083: case ADD_ALL_VALUES:
2084: for (i=0; i<n; i++) {
2085: idx = *indicesx++;
2086: idy = *indicesy++;
2087: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
2088: }
2089: break;
2090: #if !defined(PETSC_USE_COMPLEX)
2091: case MAX_VALUES:
2092: for (i=0; i<n; i++) {
2093: idx = *indicesx++;
2094: idy = *indicesy++;
2095: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2096: }
2097: #else
2098: case MAX_VALUES:
2099: #endif
2100: case NOT_SET_VALUES:
2101: break;
2102: default:
2103: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2104: }
2105: return(0);
2106: }
2108: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2109: #define BS 1
2110: #include <../src/vec/vec/utils/vpscat.h>
2111: #define BS 2
2112: #include <../src/vec/vec/utils/vpscat.h>
2113: #define BS 3
2114: #include <../src/vec/vec/utils/vpscat.h>
2115: #define BS 4
2116: #include <../src/vec/vec/utils/vpscat.h>
2117: #define BS 5
2118: #include <../src/vec/vec/utils/vpscat.h>
2119: #define BS 6
2120: #include <../src/vec/vec/utils/vpscat.h>
2121: #define BS 7
2122: #include <../src/vec/vec/utils/vpscat.h>
2123: #define BS 8
2124: #include <../src/vec/vec/utils/vpscat.h>
2125: #define BS 9
2126: #include <../src/vec/vec/utils/vpscat.h>
2127: #define BS 10
2128: #include <../src/vec/vec/utils/vpscat.h>
2129: #define BS 11
2130: #include <../src/vec/vec/utils/vpscat.h>
2131: #define BS 12
2132: #include <../src/vec/vec/utils/vpscat.h>
2133: #define BS bs
2134: #include <../src/vec/vec/utils/vpscat.h>
2136: /* ==========================================================================================*/
2138: /* create parallel to sequential scatter context */
2140: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2142: /*@
2143: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
2145: Collective on VecScatter
2147: Input Parameters:
2148: + VecScatter - obtained with VecScatterCreateEmpty()
2149: . nsends -
2150: . sendSizes -
2151: . sendProcs -
2152: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
2153: . nrecvs - number of receives to expect
2154: . recvSizes -
2155: . recvProcs - processes who are sending to me
2156: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
2157: - bs - size of block
2159: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
2160: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
2162: Probably does not handle sends to self properly. It should remove those from the counts that are used
2163: in allocating space inside of the from struct
2165: Level: intermediate
2167: @*/
2168: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
2169: {
2170: VecScatter_MPI_General *from, *to;
2171: PetscInt sendSize, recvSize;
2172: PetscInt n, i;
2173: PetscErrorCode ierr;
2175: /* allocate entire send scatter context */
2176: PetscNewLog(ctx,&to);
2177: to->n = nsends;
2178: for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];
2180: PetscMalloc1(to->n,&to->requests);
2181: PetscMalloc4(bs*sendSize,&to->values,sendSize,&to->indices,to->n+1,&to->starts,to->n,&to->procs);
2182: PetscMalloc2(PetscMax(to->n,nrecvs),&to->sstatus,PetscMax(to->n,nrecvs),&to->rstatus);
2184: to->starts[0] = 0;
2185: for (n = 0; n < to->n; n++) {
2186: if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
2187: to->starts[n+1] = to->starts[n] + sendSizes[n];
2188: to->procs[n] = sendProcs[n];
2189: for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
2190: }
2191: ctx->todata = (void*) to;
2193: /* allocate entire receive scatter context */
2194: PetscNewLog(ctx,&from);
2195: from->n = nrecvs;
2196: for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];
2198: PetscMalloc1(from->n,&from->requests);
2199: PetscMalloc4(bs*recvSize,&from->values,recvSize,&from->indices,from->n+1,&from->starts,from->n,&from->procs);
2201: from->starts[0] = 0;
2202: for (n = 0; n < from->n; n++) {
2203: if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
2204: from->starts[n+1] = from->starts[n] + recvSizes[n];
2205: from->procs[n] = recvProcs[n];
2206: for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
2207: }
2208: ctx->fromdata = (void*)from;
2210: /* No local scatter optimization */
2211: from->local.n = 0;
2212: from->local.vslots = 0;
2213: to->local.n = 0;
2214: to->local.vslots = 0;
2215: from->local.nonmatching_computed = PETSC_FALSE;
2216: from->local.n_nonmatching = 0;
2217: from->local.slots_nonmatching = 0;
2218: to->local.nonmatching_computed = PETSC_FALSE;
2219: to->local.n_nonmatching = 0;
2220: to->local.slots_nonmatching = 0;
2222: from->type = VEC_SCATTER_MPI_GENERAL;
2223: to->type = VEC_SCATTER_MPI_GENERAL;
2224: from->bs = bs;
2225: to->bs = bs;
2226: VecScatterCreateCommon_PtoS(from, to, ctx);
2228: /* mark lengths as negative so it won't check local vector lengths */
2229: ctx->from_n = ctx->to_n = -1;
2230: return(0);
2231: }
2233: /*
2234: bs indicates how many elements there are in each block. Normally this would be 1.
2236: contains check that PetscMPIInt can handle the sizes needed
2237: */
2238: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2239: {
2240: VecScatter_MPI_General *from,*to;
2241: PetscMPIInt size,rank,imdex,tag,n;
2242: PetscInt *source = NULL,*owners = NULL,nxr;
2243: PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
2244: PetscMPIInt *nprocs = NULL,nrecvs;
2245: PetscInt i,j,idx,nsends;
2246: PetscMPIInt *owner = NULL;
2247: PetscInt *starts = NULL,count,slen;
2248: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2249: PetscMPIInt *onodes1,*olengths1;
2250: MPI_Comm comm;
2251: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2252: MPI_Status recv_status,*send_status;
2253: PetscErrorCode ierr;
2256: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2257: PetscObjectGetComm((PetscObject)xin,&comm);
2258: MPI_Comm_rank(comm,&rank);
2259: MPI_Comm_size(comm,&size);
2260: owners = xin->map->range;
2261: VecGetSize(yin,&lengthy);
2262: VecGetSize(xin,&lengthx);
2264: /* first count number of contributors to each processor */
2265: PetscMalloc2(size,&nprocs,nx,&owner);
2266: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2268: j = 0;
2269: nsends = 0;
2270: for (i=0; i<nx; i++) {
2271: idx = bs*inidx[i];
2272: if (idx < owners[j]) j = 0;
2273: for (; j<size; j++) {
2274: if (idx < owners[j+1]) {
2275: if (!nprocs[j]++) nsends++;
2276: owner[i] = j;
2277: break;
2278: }
2279: }
2280: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2281: }
2283: nprocslocal = nprocs[rank];
2284: nprocs[rank] = 0;
2285: if (nprocslocal) nsends--;
2286: /* inform other processors of number of messages and max length*/
2287: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2288: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2289: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2290: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2292: /* post receives: */
2293: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2294: count = 0;
2295: for (i=0; i<nrecvs; i++) {
2296: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2297: count += olengths1[i];
2298: }
2300: /* do sends:
2301: 1) starts[i] gives the starting index in svalues for stuff going to
2302: the ith processor
2303: */
2304: nxr = 0;
2305: for (i=0; i<nx; i++) {
2306: if (owner[i] != rank) nxr++;
2307: }
2308: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2310: starts[0] = 0;
2311: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2312: for (i=0; i<nx; i++) {
2313: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2314: }
2315: starts[0] = 0;
2316: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2317: count = 0;
2318: for (i=0; i<size; i++) {
2319: if (nprocs[i]) {
2320: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2321: }
2322: }
2324: /* wait on receives */
2325: count = nrecvs;
2326: slen = 0;
2327: while (count) {
2328: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2329: /* unpack receives into our local space */
2330: MPI_Get_count(&recv_status,MPIU_INT,&n);
2331: slen += n;
2332: count--;
2333: }
2335: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2337: /* allocate entire send scatter context */
2338: PetscNewLog(ctx,&to);
2339: to->n = nrecvs;
2341: PetscMalloc1(nrecvs,&to->requests);
2342: PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2343: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2345: ctx->todata = (void*)to;
2346: to->starts[0] = 0;
2348: if (nrecvs) {
2349: /* move the data into the send scatter */
2350: base = owners[rank];
2351: rsvalues = rvalues;
2352: for (i=0; i<nrecvs; i++) {
2353: to->starts[i+1] = to->starts[i] + olengths1[i];
2354: to->procs[i] = onodes1[i];
2355: values = rsvalues;
2356: rsvalues += olengths1[i];
2357: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2358: }
2359: }
2360: PetscFree(olengths1);
2361: PetscFree(onodes1);
2362: PetscFree3(rvalues,source,recv_waits);
2364: /* allocate entire receive scatter context */
2365: PetscNewLog(ctx,&from);
2366: from->n = nsends;
2368: PetscMalloc1(nsends,&from->requests);
2369: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2370: ctx->fromdata = (void*)from;
2372: /* move data into receive scatter */
2373: PetscMalloc2(size,&lowner,nsends+1,&start);
2374: count = 0; from->starts[0] = start[0] = 0;
2375: for (i=0; i<size; i++) {
2376: if (nprocs[i]) {
2377: lowner[i] = count;
2378: from->procs[count++] = i;
2379: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2380: }
2381: }
2383: for (i=0; i<nx; i++) {
2384: if (owner[i] != rank) {
2385: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2386: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2387: }
2388: }
2389: PetscFree2(lowner,start);
2390: PetscFree2(nprocs,owner);
2392: /* wait on sends */
2393: if (nsends) {
2394: PetscMalloc1(nsends,&send_status);
2395: MPI_Waitall(nsends,send_waits,send_status);
2396: PetscFree(send_status);
2397: }
2398: PetscFree3(svalues,send_waits,starts);
2400: if (nprocslocal) {
2401: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2402: /* we have a scatter to ourselves */
2403: PetscMalloc1(nt,&to->local.vslots);
2404: PetscMalloc1(nt,&from->local.vslots);
2405: nt = 0;
2406: for (i=0; i<nx; i++) {
2407: idx = bs*inidx[i];
2408: if (idx >= owners[rank] && idx < owners[rank+1]) {
2409: to->local.vslots[nt] = idx - owners[rank];
2410: from->local.vslots[nt++] = bs*inidy[i];
2411: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2412: }
2413: }
2414: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2415: } else {
2416: from->local.n = 0;
2417: from->local.vslots = 0;
2418: to->local.n = 0;
2419: to->local.vslots = 0;
2420: }
2422: from->local.nonmatching_computed = PETSC_FALSE;
2423: from->local.n_nonmatching = 0;
2424: from->local.slots_nonmatching = 0;
2425: to->local.nonmatching_computed = PETSC_FALSE;
2426: to->local.n_nonmatching = 0;
2427: to->local.slots_nonmatching = 0;
2429: from->type = VEC_SCATTER_MPI_GENERAL;
2430: to->type = VEC_SCATTER_MPI_GENERAL;
2431: from->bs = bs;
2432: to->bs = bs;
2434: VecScatterCreateCommon_PtoS(from,to,ctx);
2435: return(0);
2436: }
2438: /*
2439: bs indicates how many elements there are in each block. Normally this would be 1.
2440: */
2441: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2442: {
2443: MPI_Comm comm;
2444: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2445: PetscInt bs = to->bs;
2446: PetscMPIInt size;
2447: PetscInt i, n;
2451: PetscObjectGetComm((PetscObject)ctx,&comm);
2452: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2453: ctx->ops->destroy = VecScatterDestroy_PtoP;
2455: ctx->reproduce = PETSC_FALSE;
2456: to->sendfirst = PETSC_FALSE;
2457: PetscOptionsGetBool(NULL,NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2458: PetscOptionsGetBool(NULL,NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2459: from->sendfirst = to->sendfirst;
2461: MPI_Comm_size(comm,&size);
2462: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2463: to->contiq = PETSC_FALSE;
2464: n = from->starts[from->n];
2465: from->contiq = PETSC_TRUE;
2466: for (i=1; i<n; i++) {
2467: if (from->indices[i] != from->indices[i-1] + bs) {
2468: from->contiq = PETSC_FALSE;
2469: break;
2470: }
2471: }
2473: to->use_alltoallv = PETSC_FALSE;
2474: PetscOptionsGetBool(NULL,NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2475: from->use_alltoallv = to->use_alltoallv;
2476: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2477: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2478: if (to->use_alltoallv) {
2479: to->use_alltoallw = PETSC_FALSE;
2480: PetscOptionsGetBool(NULL,NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2481: }
2482: from->use_alltoallw = to->use_alltoallw;
2483: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2484: #endif
2486: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2487: to->use_window = PETSC_FALSE;
2488: PetscOptionsGetBool(NULL,NULL,"-vecscatter_window",&to->use_window,NULL);
2489: from->use_window = to->use_window;
2490: #endif
2492: if (to->use_alltoallv) {
2494: PetscMalloc2(size,&to->counts,size,&to->displs);
2495: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2496: for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
2498: to->displs[0] = 0;
2499: for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];
2501: PetscMalloc2(size,&from->counts,size,&from->displs);
2502: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2503: for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2504: from->displs[0] = 0;
2505: for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];
2507: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2508: if (to->use_alltoallw) {
2509: PetscMPIInt mpibs, mpilen;
2511: ctx->packtogether = PETSC_FALSE;
2512: PetscMPIIntCast(bs,&mpibs);
2513: PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2514: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2515: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2516: for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;
2518: for (i=0; i<to->n; i++) {
2519: to->wcounts[to->procs[i]] = 1;
2520: PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2521: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2522: MPI_Type_commit(to->types+to->procs[i]);
2523: }
2524: PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2525: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2526: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2527: for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;
2529: if (from->contiq) {
2530: PetscInfo(ctx,"Scattered vector entries are stored contiguously, taking advantage of this with -vecscatter_alltoall\n");
2531: for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2533: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2534: for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
2536: } else {
2537: for (i=0; i<from->n; i++) {
2538: from->wcounts[from->procs[i]] = 1;
2539: PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2540: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2541: MPI_Type_commit(from->types+from->procs[i]);
2542: }
2543: }
2544: } else ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;
2546: #else
2547: to->use_alltoallw = PETSC_FALSE;
2548: from->use_alltoallw = PETSC_FALSE;
2549: ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;
2550: #endif
2551: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2552: } else if (to->use_window) {
2553: PetscMPIInt temptag,winsize;
2554: MPI_Request *request;
2555: MPI_Status *status;
2557: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2558: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2559: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2560: PetscMalloc1(to->n,&to->winstarts);
2561: PetscMalloc2(to->n,&request,to->n,&status);
2562: for (i=0; i<to->n; i++) {
2563: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2564: }
2565: for (i=0; i<from->n; i++) {
2566: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2567: }
2568: MPI_Waitall(to->n,request,status);
2569: PetscFree2(request,status);
2571: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2572: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2573: PetscMalloc1(from->n,&from->winstarts);
2574: PetscMalloc2(from->n,&request,from->n,&status);
2575: for (i=0; i<from->n; i++) {
2576: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2577: }
2578: for (i=0; i<to->n; i++) {
2579: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2580: }
2581: MPI_Waitall(from->n,request,status);
2582: PetscFree2(request,status);
2583: #endif
2584: } else {
2585: PetscBool use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2586: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2587: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2588: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2589: MPI_Request *rev_swaits,*rev_rwaits;
2590: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2592: /* allocate additional wait variables for the "reverse" scatter */
2593: PetscMalloc1(to->n,&rev_rwaits);
2594: PetscMalloc1(from->n,&rev_swaits);
2595: to->rev_requests = rev_rwaits;
2596: from->rev_requests = rev_swaits;
2598: /* Register the receives that you will use later (sends for scatter reverse) */
2599: PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&use_rsend,NULL);
2600: PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&use_ssend,NULL);
2601: if (use_rsend) {
2602: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2603: to->use_readyreceiver = PETSC_TRUE;
2604: from->use_readyreceiver = PETSC_TRUE;
2605: } else {
2606: to->use_readyreceiver = PETSC_FALSE;
2607: from->use_readyreceiver = PETSC_FALSE;
2608: }
2609: if (use_ssend) {
2610: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2611: }
2613: for (i=0; i<from->n; i++) {
2614: if (use_rsend) {
2615: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2616: } else if (use_ssend) {
2617: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2618: } else {
2619: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2620: }
2621: }
2623: for (i=0; i<to->n; i++) {
2624: if (use_rsend) {
2625: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2626: } else if (use_ssend) {
2627: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2628: } else {
2629: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2630: }
2631: }
2632: /* Register receives for scatter and reverse */
2633: for (i=0; i<from->n; i++) {
2634: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2635: }
2636: for (i=0; i<to->n; i++) {
2637: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2638: }
2639: if (use_rsend) {
2640: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2641: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2642: MPI_Barrier(comm);
2643: }
2645: ctx->ops->copy = VecScatterCopy_PtoP_X;
2646: }
2647: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2649: #if defined(PETSC_USE_DEBUG)
2650: MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2651: MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2652: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2653: #endif
2655: switch (bs) {
2656: case 12:
2657: ctx->ops->begin = VecScatterBegin_12;
2658: ctx->ops->end = VecScatterEnd_12;
2659: break;
2660: case 11:
2661: ctx->ops->begin = VecScatterBegin_11;
2662: ctx->ops->end = VecScatterEnd_11;
2663: break;
2664: case 10:
2665: ctx->ops->begin = VecScatterBegin_10;
2666: ctx->ops->end = VecScatterEnd_10;
2667: break;
2668: case 9:
2669: ctx->ops->begin = VecScatterBegin_9;
2670: ctx->ops->end = VecScatterEnd_9;
2671: break;
2672: case 8:
2673: ctx->ops->begin = VecScatterBegin_8;
2674: ctx->ops->end = VecScatterEnd_8;
2675: break;
2676: case 7:
2677: ctx->ops->begin = VecScatterBegin_7;
2678: ctx->ops->end = VecScatterEnd_7;
2679: break;
2680: case 6:
2681: ctx->ops->begin = VecScatterBegin_6;
2682: ctx->ops->end = VecScatterEnd_6;
2683: break;
2684: case 5:
2685: ctx->ops->begin = VecScatterBegin_5;
2686: ctx->ops->end = VecScatterEnd_5;
2687: break;
2688: case 4:
2689: ctx->ops->begin = VecScatterBegin_4;
2690: ctx->ops->end = VecScatterEnd_4;
2691: break;
2692: case 3:
2693: ctx->ops->begin = VecScatterBegin_3;
2694: ctx->ops->end = VecScatterEnd_3;
2695: break;
2696: case 2:
2697: ctx->ops->begin = VecScatterBegin_2;
2698: ctx->ops->end = VecScatterEnd_2;
2699: break;
2700: case 1:
2701: ctx->ops->begin = VecScatterBegin_1;
2702: ctx->ops->end = VecScatterEnd_1;
2703: break;
2704: default:
2705: ctx->ops->begin = VecScatterBegin_bs;
2706: ctx->ops->end = VecScatterEnd_bs;
2708: }
2709: ctx->ops->view = VecScatterView_MPI;
2710: /* Check if the local scatter is actually a copy; important special case */
2711: if (to->local.n) {
2712: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2713: }
2714: return(0);
2715: }
2719: /* ------------------------------------------------------------------------------------*/
2720: /*
2721: Scatter from local Seq vectors to a parallel vector.
2722: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2723: reverses the result.
2724: */
2725: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2726: {
2727: PetscErrorCode ierr;
2728: MPI_Request *waits;
2729: VecScatter_MPI_General *to,*from;
2732: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2733: to = (VecScatter_MPI_General*)ctx->fromdata;
2734: from = (VecScatter_MPI_General*)ctx->todata;
2735: ctx->todata = (void*)to;
2736: ctx->fromdata = (void*)from;
2737: /* these two are special, they are ALWAYS stored in to struct */
2738: to->sstatus = from->sstatus;
2739: to->rstatus = from->rstatus;
2741: from->sstatus = 0;
2742: from->rstatus = 0;
2744: waits = from->rev_requests;
2745: from->rev_requests = from->requests;
2746: from->requests = waits;
2747: waits = to->rev_requests;
2748: to->rev_requests = to->requests;
2749: to->requests = waits;
2750: return(0);
2751: }
2753: /* ---------------------------------------------------------------------------------*/
2754: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2755: {
2757: PetscMPIInt size,rank,tag,imdex,n;
2758: PetscInt *owners = xin->map->range;
2759: PetscMPIInt *nprocs = NULL;
2760: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2761: PetscMPIInt *owner = NULL;
2762: PetscInt *starts = NULL,count,slen;
2763: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2764: PetscMPIInt *onodes1,*olengths1,nrecvs;
2765: MPI_Comm comm;
2766: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2767: MPI_Status recv_status,*send_status = NULL;
2768: PetscBool duplicate = PETSC_FALSE;
2769: #if defined(PETSC_USE_DEBUG)
2770: PetscBool found = PETSC_FALSE;
2771: #endif
2774: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2775: PetscObjectGetComm((PetscObject)xin,&comm);
2776: MPI_Comm_size(comm,&size);
2777: MPI_Comm_rank(comm,&rank);
2778: if (size == 1) {
2779: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2780: return(0);
2781: }
2783: /*
2784: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2785: They then call the StoPScatterCreate()
2786: */
2787: /* first count number of contributors to each processor */
2788: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2789: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2791: lastidx = -1;
2792: j = 0;
2793: for (i=0; i<nx; i++) {
2794: /* if indices are NOT locally sorted, need to start search at the beginning */
2795: if (lastidx > (idx = bs*inidx[i])) j = 0;
2796: lastidx = idx;
2797: for (; j<size; j++) {
2798: if (idx >= owners[j] && idx < owners[j+1]) {
2799: nprocs[j]++;
2800: owner[i] = j;
2801: #if defined(PETSC_USE_DEBUG)
2802: found = PETSC_TRUE;
2803: #endif
2804: break;
2805: }
2806: }
2807: #if defined(PETSC_USE_DEBUG)
2808: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2809: found = PETSC_FALSE;
2810: #endif
2811: }
2812: nsends = 0;
2813: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2815: /* inform other processors of number of messages and max length*/
2816: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2817: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2818: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2819: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2821: /* post receives: */
2822: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2824: count = 0;
2825: for (i=0; i<nrecvs; i++) {
2826: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2827: count += olengths1[i];
2828: }
2829: PetscFree(onodes1);
2831: /* do sends:
2832: 1) starts[i] gives the starting index in svalues for stuff going to
2833: the ith processor
2834: */
2835: starts[0]= 0;
2836: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2837: for (i=0; i<nx; i++) {
2838: svalues[2*starts[owner[i]]] = bs*inidx[i];
2839: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2840: }
2842: starts[0] = 0;
2843: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2844: count = 0;
2845: for (i=0; i<size; i++) {
2846: if (nprocs[i]) {
2847: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2848: count++;
2849: }
2850: }
2851: PetscFree3(nprocs,owner,starts);
2853: /* wait on receives */
2854: count = nrecvs;
2855: slen = 0;
2856: while (count) {
2857: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2858: /* unpack receives into our local space */
2859: MPI_Get_count(&recv_status,MPIU_INT,&n);
2860: slen += n/2;
2861: count--;
2862: }
2863: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2865: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2866: base = owners[rank];
2867: count = 0;
2868: rsvalues = rvalues;
2869: for (i=0; i<nrecvs; i++) {
2870: values = rsvalues;
2871: rsvalues += 2*olengths1[i];
2872: for (j=0; j<olengths1[i]; j++) {
2873: local_inidx[count] = values[2*j] - base;
2874: local_inidy[count++] = values[2*j+1];
2875: }
2876: }
2877: PetscFree(olengths1);
2879: /* wait on sends */
2880: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2881: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2883: /*
2884: should sort and remove duplicates from local_inidx,local_inidy
2885: */
2887: #if defined(do_it_slow)
2888: /* sort on the from index */
2889: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2890: start = 0;
2891: while (start < slen) {
2892: count = start+1;
2893: last = local_inidx[start];
2894: while (count < slen && last == local_inidx[count]) count++;
2895: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2896: /* sort on to index */
2897: PetscSortInt(count-start,local_inidy+start);
2898: }
2899: /* remove duplicates; not most efficient way, but probably good enough */
2900: i = start;
2901: while (i < count-1) {
2902: if (local_inidy[i] != local_inidy[i+1]) i++;
2903: else { /* found a duplicate */
2904: duplicate = PETSC_TRUE;
2905: for (j=i; j<slen-1; j++) {
2906: local_inidx[j] = local_inidx[j+1];
2907: local_inidy[j] = local_inidy[j+1];
2908: }
2909: slen--;
2910: count--;
2911: }
2912: }
2913: start = count;
2914: }
2915: #endif
2916: if (duplicate) {
2917: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2918: }
2919: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2920: PetscFree2(local_inidx,local_inidy);
2921: return(0);
2922: }
2924: /*@
2925: PetscSFCreateFromZero - Create a PetscSF that maps a Vec from sequential to distributed
2927: Input Parameters:
2928: . gv - A distributed Vec
2930: Output Parameters:
2931: . sf - The SF created mapping a sequential Vec to gv
2933: Level: developer
2935: .seealso: DMPlexDistributedToSequential()
2936: @*/
2937: PetscErrorCode PetscSFCreateFromZero(MPI_Comm comm, Vec gv, PetscSF *sf)
2938: {
2939: PetscSFNode *remotenodes;
2940: PetscInt *localnodes;
2941: PetscInt N, n, start, numroots, l;
2942: PetscMPIInt rank;
2946: PetscSFCreate(comm, sf);
2947: VecGetSize(gv, &N);
2948: VecGetLocalSize(gv, &n);
2949: VecGetOwnershipRange(gv, &start, NULL);
2950: MPI_Comm_rank(comm, &rank);
2951: PetscMalloc1(n, &localnodes);
2952: PetscMalloc1(n, &remotenodes);
2953: if (!rank) numroots = N;
2954: else numroots = 0;
2955: for (l = 0; l < n; ++l) {
2956: localnodes[l] = l;
2957: remotenodes[l].rank = 0;
2958: remotenodes[l].index = l+start;
2959: }
2960: PetscSFSetGraph(*sf, numroots, n, localnodes, PETSC_OWN_POINTER, remotenodes, PETSC_OWN_POINTER);
2961: return(0);
2962: }