Actual source code: seqvscat.c
petsc-3.14.1 2020-11-03
2: /*
3: Code for creating scatters between vectors. This file
4: includes the code for scattering between sequential vectors and
5: some special cases for parallel scatters.
6: */
8: #include <petsc/private/vecscatterimpl.h>
10: #if defined(PETSC_HAVE_CUDA)
11: #include <petsc/private/cudavecimpl.h>
12: #endif
14: /*
15: Checks if any indices are less than zero and generates an error
16: */
17: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
18: {
19: PetscInt i;
22: if (!PetscDefined(USE_DEBUG)) return(0);
23: for (i=0; i<n; i++) {
24: if (idx[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
25: if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
26: }
27: return(0);
28: }
30: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
31: {
35: PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
36: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);
37: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);
38: PetscFree2(ctx->todata,ctx->fromdata);
39: return(0);
40: }
42: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
43: {
47: PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
48: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);
49: PetscFree2(ctx->todata,ctx->fromdata);
50: return(0);
51: }
53: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
54: {
58: PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
59: VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);
60: PetscFree2(ctx->todata,ctx->fromdata);
61: return(0);
62: }
64: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
65: {
69: PetscFree2(ctx->todata,ctx->fromdata);
70: return(0);
71: }
73: /* --------------------------------------------------------------------------------------*/
74: /*
75: Scatter: sequential general to sequential general
76: */
77: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
78: {
79: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
80: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
81: PetscErrorCode ierr;
82: PetscInt i,n = gen_from->n,*fslots,*tslots;
83: PetscScalar *xv,*yv;
84: #if defined(PETSC_HAVE_CUDA)
85: PetscBool is_veccuda,isy_veccuda;
86: #endif
89: #if defined(PETSC_HAVE_CUDA)
90: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
91: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
92: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
93: /* create the scatter indices if not done already */
94: if (!ctx->spptr) {
95: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
96: fslots = gen_from->vslots;
97: tslots = gen_to->vslots;
98: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
99: }
100: /* next do the scatter */
101: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
102: return(0);
103: }
104: #endif
106: VecGetArrayPair(x,y,&xv,&yv);
107: if (mode & SCATTER_REVERSE) {
108: gen_to = (VecScatter_Seq_General*)ctx->fromdata;
109: gen_from = (VecScatter_Seq_General*)ctx->todata;
110: }
111: fslots = gen_from->vslots;
112: tslots = gen_to->vslots;
114: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&gen_from->memcpy_plan,yv,&gen_to->memcpy_plan,addv); }
115: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = xv[fslots[i]]; }
116: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]]; }
117: #if !defined(PETSC_USE_COMPLEX)
118: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]); }
119: #endif
120: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
121: VecRestoreArrayPair(x,y,&xv,&yv);
122: return(0);
123: }
125: /*
126: Scatter: sequential general to sequential stride 1
127: */
128: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
129: {
130: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
131: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
132: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
133: PetscErrorCode ierr;
134: PetscInt first = gen_to->first;
135: PetscScalar *xv,*yv;
136: #if defined(PETSC_HAVE_CUDA)
137: PetscBool is_veccuda,isy_veccuda;
138: #endif
141: #if defined(PETSC_HAVE_CUDA)
142: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
143: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
144: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
145: /* create the scatter indices if not done already */
146: if (!ctx->spptr) {
147: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
148: PetscInt *tslots = 0;
149: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
150: }
151: /* next do the scatter */
152: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
153: return(0);
154: }
155: #endif
157: VecGetArrayPair(x,y,&xv,&yv);
158: if (mode & SCATTER_REVERSE) {
159: PetscScalar *xxv = xv + first;
160: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_from->memcpy_plan,addv,1); }
161: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] = xxv[i]; }
162: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] += xxv[i]; }
163: #if !defined(PETSC_USE_COMPLEX)
164: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xxv[i]); }
165: #endif
166: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
167: } else {
168: PetscScalar *yyv = yv + first;
169: if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_from->memcpy_plan,yyv,addv,1); }
170: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i] = xv[fslots[i]]; }
171: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yyv[i] += xv[fslots[i]]; }
172: #if !defined(PETSC_USE_COMPLEX)
173: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xv[fslots[i]]); }
174: #endif
175: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
176: }
177: VecRestoreArrayPair(x,y,&xv,&yv);
178: return(0);
179: }
181: /*
182: Scatter: sequential general to sequential stride
183: */
184: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
185: {
186: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
187: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
188: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
189: PetscErrorCode ierr;
190: PetscInt first = gen_to->first,step = gen_to->step;
191: PetscScalar *xv,*yv;
192: #if defined(PETSC_HAVE_CUDA)
193: PetscBool is_veccuda,isy_veccuda;
194: #endif
197: #if defined(PETSC_HAVE_CUDA)
198: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
199: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
200: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
201: /* create the scatter indices if not done already */
202: if (!ctx->spptr) {
203: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
204: PetscInt * tslots = 0;
205: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
206: }
207: /* next do the scatter */
208: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
209: return(0);
210: }
211: #endif
213: VecGetArrayPair(x,y,&xv,&yv);
214: if (mode & SCATTER_REVERSE) {
215: if (addv == INSERT_VALUES) {
216: for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
217: } else if (addv == ADD_VALUES) {
218: for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
219: #if !defined(PETSC_USE_COMPLEX)
220: } else if (addv == MAX_VALUES) {
221: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
222: #endif
223: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
224: } else {
225: if (addv == INSERT_VALUES) {
226: for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
227: } else if (addv == ADD_VALUES) {
228: for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
229: #if !defined(PETSC_USE_COMPLEX)
230: } else if (addv == MAX_VALUES) {
231: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
232: #endif
233: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
234: }
235: VecRestoreArrayPair(x,y,&xv,&yv);
236: return(0);
237: }
239: /*
240: Scatter: sequential stride 1 to sequential general
241: */
242: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
243: {
244: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
245: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
246: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
247: PetscErrorCode ierr;
248: PetscInt first = gen_from->first;
249: PetscScalar *xv,*yv;
250: #if defined(PETSC_HAVE_CUDA)
251: PetscBool is_veccuda,isy_veccuda;
252: #endif
255: #if defined(PETSC_HAVE_CUDA)
256: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
257: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
258: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
259: /* create the scatter indices if not done already */
260: if (!ctx->spptr) {
261: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
262: PetscInt *fslots = 0;
263: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
264: }
265: /* next do the scatter */
266: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
267: return(0);
268: }
269: #endif
271: VecGetArrayPair(x,y,&xv,&yv);
272: if (mode & SCATTER_REVERSE) {
273: PetscScalar *yyv = yv + first;
274: if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_to->memcpy_plan,yyv,addv,1); }
275: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i] = xv[tslots[i]]; }
276: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yyv[i] += xv[tslots[i]]; }
277: #if !defined(PETSC_USE_COMPLEX)
278: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xv[tslots[i]]); }
279: #endif
280: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
281: } else {
282: PetscScalar *xxv = xv + first;
283: if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_to->memcpy_plan,addv,1); }
284: else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = xxv[i]; }
285: else if (addv == ADD_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] += xxv[i]; }
286: #if !defined(PETSC_USE_COMPLEX)
287: else if (addv == MAX_VALUES) { for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xxv[i]); }
288: #endif
289: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
290: }
291: VecRestoreArrayPair(x,y,&xv,&yv);
292: return(0);
293: }
295: /*
296: Scatter: sequential stride to sequential general
297: */
298: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
299: {
300: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
301: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
302: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
303: PetscErrorCode ierr;
304: PetscInt first = gen_from->first,step = gen_from->step;
305: PetscScalar *xv,*yv;
306: #if defined(PETSC_HAVE_CUDA)
307: PetscBool is_veccuda,isy_veccuda;
308: #endif
311: #if defined(PETSC_HAVE_CUDA)
312: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
313: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
314: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
315: /* create the scatter indices if not done already */
316: if (!ctx->spptr) {
317: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
318: PetscInt *fslots = 0;
319: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
320: }
321: /* next do the scatter */
322: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
323: return(0);
324: }
325: #endif
327: VecGetArrayPair(x,y,&xv,&yv);
328: if (mode & SCATTER_REVERSE) {
329: if (addv == INSERT_VALUES) {
330: for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
331: } else if (addv == ADD_VALUES) {
332: for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
333: #if !defined(PETSC_USE_COMPLEX)
334: } else if (addv == MAX_VALUES) {
335: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
336: #endif
337: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
338: } else {
339: if (addv == INSERT_VALUES) {
340: for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
341: } else if (addv == ADD_VALUES) {
342: for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
343: #if !defined(PETSC_USE_COMPLEX)
344: } else if (addv == MAX_VALUES) {
345: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
346: #endif
347: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
348: }
349: VecRestoreArrayPair(x,y,&xv,&yv);
350: return(0);
351: }
353: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
354: {
355: PetscErrorCode ierr;
356: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
357: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
358: PetscInt i;
359: PetscBool isascii;
362: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
363: if (isascii) {
364: PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
365: for (i=0; i<in_to->n; i++) {
366: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
367: }
368: if (in_to->memcpy_plan.optimized[0]) {
369: PetscViewerASCIIPrintf(viewer,"This stride1 to general scatter is made of %D copies\n",in_to->memcpy_plan.copy_offsets[1]);
370: }
371: }
372: return(0);
373: }
374: /*
375: Scatter: sequential stride to sequential stride
376: */
377: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
378: {
379: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
380: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
381: PetscInt i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
382: PetscErrorCode ierr;
383: PetscInt from_first = gen_from->first,from_step = gen_from->step;
384: PetscScalar *xv,*yv;
385: #if defined(PETSC_HAVE_CUDA)
386: PetscBool is_veccuda,isy_veccuda;
387: #endif
390: #if defined(PETSC_HAVE_CUDA)
391: PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
392: PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
393: if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
394: /* create the scatter indices if not done already */
395: if (!ctx->spptr) {
396: PetscInt *tslots = 0,*fslots = 0;
397: VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
398: }
399: /* next do the scatter */
400: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
401: return(0);
402: }
403: #endif
405: VecGetArrayPair(x,y,&xv,&yv);
406: if (mode & SCATTER_REVERSE) {
407: from_first = gen_to->first;
408: to_first = gen_from->first;
409: from_step = gen_to->step;
410: to_step = gen_from->step;
411: }
413: if (addv == INSERT_VALUES) {
414: if (to_step == 1 && from_step == 1) {
415: PetscArraycpy(yv+to_first,xv+from_first,n);
416: } else {
417: for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
418: }
419: } else if (addv == ADD_VALUES) {
420: if (to_step == 1 && from_step == 1) {
421: PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
422: for (i=0; i<n; i++) yyv[i] += xxv[i];
423: } else {
424: for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
425: }
426: #if !defined(PETSC_USE_COMPLEX)
427: } else if (addv == MAX_VALUES) {
428: if (to_step == 1 && from_step == 1) {
429: PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
430: for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xxv[i]);
431: } else {
432: for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
433: }
434: #endif
435: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
436: VecRestoreArrayPair(x,y,&xv,&yv);
437: return(0);
438: }
440: /* --------------------------------------------------------------------------*/
443: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
444: {
445: PetscErrorCode ierr;
446: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
447: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
450: out->ops->begin = in->ops->begin;
451: out->ops->end = in->ops->end;
452: out->ops->copy = in->ops->copy;
453: out->ops->destroy = in->ops->destroy;
454: out->ops->view = in->ops->view;
456: PetscMalloc2(1,&out_to,1,&out_from);
457: PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
458: out_to->n = in_to->n;
459: out_to->format = in_to->format;
460: out_to->nonmatching_computed = PETSC_FALSE;
461: out_to->slots_nonmatching = NULL;
462: PetscArraycpy(out_to->vslots,in_to->vslots,out_to->n);
463: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
465: out_from->n = in_from->n;
466: out_from->format = in_from->format;
467: out_from->nonmatching_computed = PETSC_FALSE;
468: out_from->slots_nonmatching = NULL;
469: PetscArraycpy(out_from->vslots,in_from->vslots,out_from->n);
470: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
472: out->todata = (void*)out_to;
473: out->fromdata = (void*)out_from;
474: return(0);
475: }
477: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
478: {
479: PetscErrorCode ierr;
480: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
481: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
482: PetscInt i;
483: PetscBool isascii;
486: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
487: if (isascii) {
488: PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
489: for (i=0; i<in_to->n; i++) {
490: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
491: }
492: if (in_from->memcpy_plan.optimized[0]) {
493: PetscViewerASCIIPrintf(viewer,"This general to general scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
494: }
495: }
496: return(0);
497: }
500: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
501: {
502: PetscErrorCode ierr;
503: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
504: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
507: out->ops->begin = in->ops->begin;
508: out->ops->end = in->ops->end;
509: out->ops->copy = in->ops->copy;
510: out->ops->destroy = in->ops->destroy;
511: out->ops->view = in->ops->view;
513: PetscMalloc2(1,&out_to,1,&out_from);
514: PetscMalloc1(in_from->n,&out_from->vslots);
515: out_to->n = in_to->n;
516: out_to->format = in_to->format;
517: out_to->first = in_to->first;
518: out_to->step = in_to->step;
519: out_to->format = in_to->format;
521: out_from->n = in_from->n;
522: out_from->format = in_from->format;
523: out_from->nonmatching_computed = PETSC_FALSE;
524: out_from->slots_nonmatching = NULL;
525: PetscArraycpy(out_from->vslots,in_from->vslots,out_from->n);
526: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
528: out->todata = (void*)out_to;
529: out->fromdata = (void*)out_from;
530: return(0);
531: }
533: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
534: {
535: PetscErrorCode ierr;
536: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
537: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
538: PetscInt i;
539: PetscBool isascii;
542: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
543: if (isascii) {
544: PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
545: for (i=0; i<in_to->n; i++) {
546: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
547: }
548: if (in_from->memcpy_plan.optimized[0]) {
549: PetscViewerASCIIPrintf(viewer,"This general to stride1 scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
550: }
551: }
552: return(0);
553: }
555: /* --------------------------------------------------------------------------*/
556: /*
557: Scatter: parallel to sequential vector, sequential strides for both.
558: */
559: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
560: {
561: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
562: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
563: PetscErrorCode ierr;
566: out->ops->begin = in->ops->begin;
567: out->ops->end = in->ops->end;
568: out->ops->copy = in->ops->copy;
569: out->ops->destroy = in->ops->destroy;
570: out->ops->view = in->ops->view;
572: PetscMalloc2(1,&out_to,1,&out_from);
573: out_to->n = in_to->n;
574: out_to->format = in_to->format;
575: out_to->first = in_to->first;
576: out_to->step = in_to->step;
577: out_to->format = in_to->format;
578: out_from->n = in_from->n;
579: out_from->format= in_from->format;
580: out_from->first = in_from->first;
581: out_from->step = in_from->step;
582: out_from->format= in_from->format;
583: out->todata = (void*)out_to;
584: out->fromdata = (void*)out_from;
585: return(0);
586: }
588: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
589: {
590: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
591: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
592: PetscErrorCode ierr;
593: PetscBool isascii;
596: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
597: if (isascii) {
598: PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
599: }
600: return(0);
601: }
603: /* --------------------------------------------------------------------------------------*/
604: /* Create a memcpy plan for a sequential general (SG) to SG scatter */
605: PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt bs,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
606: {
607: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
608: PetscInt j,n_copies;
610: PetscBool same_copy_starts;
613: PetscMemzero(&to->memcpy_plan,sizeof(VecScatterMemcpyPlan));
614: PetscMemzero(&from->memcpy_plan,sizeof(VecScatterMemcpyPlan));
615: to->memcpy_plan.n = 1;
616: from->memcpy_plan.n = 1;
618: /* malloc and init the two fields to false and zero */
619: PetscCalloc2(1,&to->memcpy_plan.optimized,2,&to->memcpy_plan.copy_offsets);
620: PetscCalloc2(1,&from->memcpy_plan.optimized,2,&from->memcpy_plan.copy_offsets);
622: /* count number of copies, which runs from 1 to n */
623: n_copies = 1;
624: for (i=0; i<n-1; i++) {
625: if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) n_copies++;
626: }
628: /* if average copy size >= 256 bytes, use memcpy instead of load/store */
629: if (bs*n*sizeof(PetscScalar)/n_copies >= 256) {
630: PetscMalloc2(n_copies,&to->memcpy_plan.copy_starts,n_copies,&to->memcpy_plan.copy_lengths);
631: PetscMalloc2(n_copies,&from->memcpy_plan.copy_starts,n_copies,&from->memcpy_plan.copy_lengths);
633: /* set up copy_starts[] & copy_lenghts[] of to and from */
634: to->memcpy_plan.copy_starts[0] = to_slots[0];
635: from->memcpy_plan.copy_starts[0] = from_slots[0];
637: if (n_copies != 1) { /* one copy is trival and we can save some work */
638: j = 0; /* j-th copy */
639: for (i=0; i<n-1; i++) {
640: if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) {
641: to->memcpy_plan.copy_lengths[j] = to_slots[i]+bs-to->memcpy_plan.copy_starts[j];
642: from->memcpy_plan.copy_lengths[j] = from_slots[i]+bs-from->memcpy_plan.copy_starts[j];
643: to->memcpy_plan.copy_starts[j+1] = to_slots[i+1];
644: from->memcpy_plan.copy_starts[j+1] = from_slots[i+1];
645: j++;
646: }
647: }
648: }
650: /* set up copy_lengths[] of the last copy */
651: to->memcpy_plan.copy_lengths[n_copies-1] = to_slots[n-1]+bs-to->memcpy_plan.copy_starts[n_copies-1];
652: from->memcpy_plan.copy_lengths[n_copies-1] = from_slots[n-1]+bs-from->memcpy_plan.copy_starts[n_copies-1];
654: /* check if to and from have the same copy_starts[] values */
655: same_copy_starts = PETSC_TRUE;
656: for (i=0; i<n_copies; i++) {
657: if (to->memcpy_plan.copy_starts[i] != from->memcpy_plan.copy_starts[i]) { same_copy_starts = PETSC_FALSE; break; }
658: }
660: to->memcpy_plan.optimized[0] = PETSC_TRUE;
661: from->memcpy_plan.optimized[0] = PETSC_TRUE;
662: to->memcpy_plan.copy_offsets[1] = n_copies;
663: from->memcpy_plan.copy_offsets[1] = n_copies;
664: to->memcpy_plan.same_copy_starts = same_copy_starts;
665: from->memcpy_plan.same_copy_starts = same_copy_starts;
666: }
668: /* we do not do stride optimzation for this kind of scatter since the chance is rare. All related fields are zeroed out */
669: return(0);
670: }
672: /* -------------------------------------------------- */
673: PetscErrorCode VecScatterSetUp_Seq(VecScatter ctx)
674: {
675: PetscErrorCode ierr;
676: PetscInt ix_type=-1,iy_type=-1;
677: IS tix = NULL,tiy = NULL,ix=ctx->from_is,iy=ctx->to_is;
680: GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
681: if (tix) ix = tix;
682: if (tiy) iy = tiy;
684: if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
685: PetscInt nx,ny;
686: const PetscInt *idx,*idy;
687: VecScatter_Seq_General *to = NULL,*from = NULL;
689: ISGetLocalSize(ix,&nx);
690: ISGetLocalSize(iy,&ny);
691: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
692: ISGetIndices(ix,&idx);
693: ISGetIndices(iy,&idy);
694: PetscMalloc2(1,&to,1,&from);
695: PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
696: to->n = nx;
697: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
698: PetscArraycpy(to->vslots,idy,nx);
699: from->n = nx;
700: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
701: PetscArraycpy(from->vslots,idx,nx);
702: to->format = VEC_SCATTER_SEQ_GENERAL;
703: from->format = VEC_SCATTER_SEQ_GENERAL;
704: ctx->todata = (void*)to;
705: ctx->fromdata = (void*)from;
706: VecScatterMemcpyPlanCreate_SGToSG(1,to,from);
707: ctx->ops->begin = VecScatterBegin_SGToSG;
708: ctx->ops->end = NULL;
709: ctx->ops->destroy = VecScatterDestroy_SGToSG;
710: ctx->ops->copy = VecScatterCopy_SGToSG;
711: ctx->ops->view = VecScatterView_SGToSG;
712: PetscInfo(ctx->from_v,"Special case: sequential vector general scatter\n");
713: goto functionend;
714: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
715: PetscInt nx,ny,to_first,to_step,from_first,from_step;
716: VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;
718: ISGetLocalSize(ix,&nx);
719: ISGetLocalSize(iy,&ny);
720: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
721: ISStrideGetInfo(iy,&to_first,&to_step);
722: ISStrideGetInfo(ix,&from_first,&from_step);
723: PetscMalloc2(1,&to8,1,&from8);
724: to8->n = nx;
725: to8->first = to_first;
726: to8->step = to_step;
727: from8->n = nx;
728: from8->first = from_first;
729: from8->step = from_step;
730: to8->format = VEC_SCATTER_SEQ_STRIDE;
731: from8->format = VEC_SCATTER_SEQ_STRIDE;
732: ctx->todata = (void*)to8;
733: ctx->fromdata = (void*)from8;
734: ctx->ops->begin = VecScatterBegin_SSToSS;
735: ctx->ops->end = NULL;
736: ctx->ops->destroy = VecScatterDestroy_SSToSS;
737: ctx->ops->copy = VecScatterCopy_SSToSS;
738: ctx->ops->view = VecScatterView_SSToSS;
739: PetscInfo(ctx->from_v,"Special case: sequential vector stride to stride\n");
740: goto functionend;
741: } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
742: PetscInt nx,ny,first,step;
743: const PetscInt *idx;
744: VecScatter_Seq_General *from9 = NULL;
745: VecScatter_Seq_Stride *to9 = NULL;
747: ISGetLocalSize(ix,&nx);
748: ISGetIndices(ix,&idx);
749: ISGetLocalSize(iy,&ny);
750: ISStrideGetInfo(iy,&first,&step);
751: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
752: PetscMalloc2(1,&to9,1,&from9);
753: PetscMemzero(&from9->memcpy_plan,sizeof(VecScatterMemcpyPlan));
754: PetscMalloc1(nx,&from9->vslots);
755: to9->n = nx;
756: to9->first = first;
757: to9->step = step;
758: from9->n = nx;
759: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
760: PetscArraycpy(from9->vslots,idx,nx);
761: ctx->todata = (void*)to9; ctx->fromdata = (void*)from9;
762: if (step == 1) {
763: PetscInt tmp[2];
764: tmp[0] = 0; tmp[1] = nx;
765: VecScatterMemcpyPlanCreate_Index(1,tmp,from9->vslots,1,&from9->memcpy_plan);
766: ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
767: } else {
768: ctx->ops->begin = VecScatterBegin_SGToSS;
769: }
770: ctx->ops->destroy = VecScatterDestroy_SGToSS;
771: ctx->ops->end = NULL;
772: ctx->ops->copy = VecScatterCopy_SGToSS;
773: ctx->ops->view = VecScatterView_SGToSS;
774: to9->format = VEC_SCATTER_SEQ_STRIDE;
775: from9->format = VEC_SCATTER_SEQ_GENERAL;
776: PetscInfo(ctx->from_v,"Special case: sequential vector general to stride\n");
777: goto functionend;
778: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
779: PetscInt nx,ny,first,step;
780: const PetscInt *idy;
781: VecScatter_Seq_General *to10 = NULL;
782: VecScatter_Seq_Stride *from10 = NULL;
784: ISGetLocalSize(ix,&nx);
785: ISGetIndices(iy,&idy);
786: ISGetLocalSize(iy,&ny);
787: ISStrideGetInfo(ix,&first,&step);
788: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
789: PetscMalloc2(1,&to10,1,&from10);
790: PetscMemzero(&to10->memcpy_plan,sizeof(VecScatterMemcpyPlan));
791: PetscMalloc1(nx,&to10->vslots);
792: from10->n = nx;
793: from10->first = first;
794: from10->step = step;
795: to10->n = nx;
796: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
797: PetscArraycpy(to10->vslots,idy,nx);
798: ctx->todata = (void*)to10;
799: ctx->fromdata = (void*)from10;
800: if (step == 1) {
801: PetscInt tmp[2];
802: tmp[0] = 0; tmp[1] = nx;
803: VecScatterMemcpyPlanCreate_Index(1,tmp,to10->vslots,1,&to10->memcpy_plan);
804: ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
805: } else {
806: ctx->ops->begin = VecScatterBegin_SSToSG;
807: }
808: ctx->ops->destroy = VecScatterDestroy_SSToSG;
809: ctx->ops->end = NULL;
810: ctx->ops->copy = NULL;
811: ctx->ops->view = VecScatterView_SSToSG;
812: to10->format = VEC_SCATTER_SEQ_GENERAL;
813: from10->format = VEC_SCATTER_SEQ_STRIDE;
814: PetscInfo(ctx->from_v,"Special case: sequential vector stride to general\n");
815: goto functionend;
816: } else {
817: PetscInt nx,ny;
818: const PetscInt *idx,*idy;
819: VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
820: PetscBool idnx,idny;
822: ISGetLocalSize(ix,&nx);
823: ISGetLocalSize(iy,&ny);
824: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);
826: ISIdentity(ix,&idnx);
827: ISIdentity(iy,&idny);
828: if (idnx && idny) {
829: VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
830: PetscMalloc2(1,&to13,1,&from13);
831: to13->n = nx;
832: to13->first = 0;
833: to13->step = 1;
834: from13->n = nx;
835: from13->first = 0;
836: from13->step = 1;
837: to13->format = VEC_SCATTER_SEQ_STRIDE;
838: from13->format = VEC_SCATTER_SEQ_STRIDE;
839: ctx->todata = (void*)to13;
840: ctx->fromdata = (void*)from13;
841: ctx->ops->begin = VecScatterBegin_SSToSS;
842: ctx->ops->end = NULL;
843: ctx->ops->destroy = VecScatterDestroy_SSToSS;
844: ctx->ops->copy = VecScatterCopy_SSToSS;
845: ctx->ops->view = VecScatterView_SSToSS;
846: PetscInfo(ctx->from_v,"Special case: sequential copy\n");
847: goto functionend;
848: }
850: ISGetIndices(iy,&idy);
851: ISGetIndices(ix,&idx);
852: PetscMalloc2(1,&to11,1,&from11);
853: PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
854: to11->n = nx;
855: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
856: PetscArraycpy(to11->vslots,idy,nx);
857: from11->n = nx;
858: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
859: PetscArraycpy(from11->vslots,idx,nx);
860: to11->format = VEC_SCATTER_SEQ_GENERAL;
861: from11->format = VEC_SCATTER_SEQ_GENERAL;
862: ctx->todata = (void*)to11;
863: ctx->fromdata = (void*)from11;
864: ctx->ops->begin = VecScatterBegin_SGToSG;
865: ctx->ops->end = NULL;
866: ctx->ops->destroy = VecScatterDestroy_SGToSG;
867: ctx->ops->copy = VecScatterCopy_SGToSG;
868: ctx->ops->view = VecScatterView_SGToSG;
869: VecScatterMemcpyPlanCreate_SGToSG(1,to11,from11);
870: ISRestoreIndices(ix,&idx);
871: ISRestoreIndices(iy,&idy);
872: PetscInfo(ctx->from_v,"Sequential vector scatter with block indices\n");
873: goto functionend;
874: }
875: functionend:
876: ISDestroy(&tix);
877: ISDestroy(&tiy);
878: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
879: return(0);
880: }
882: PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
883: {
884: PetscErrorCode ierr;
887: ctx->ops->setup = VecScatterSetUp_Seq;
888: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);
889: return(0);
890: }