Actual source code: vpscat.h
petsc-3.8.3 2017-12-09
2: /*
3: Defines the methods VecScatterBegin/End_1,2,......
4: This is included by vpscat.c with different values for BS
6: This is a terrible way of doing "templates" in C.
7: */
8: #define PETSCMAP1_a(a,b) a ## _ ## b
9: #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b)
10: #define PETSCMAP1(a) PETSCMAP1_b(a,BS)
12: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
13: {
14: VecScatter_MPI_General *to,*from;
15: PetscScalar *xv,*yv,*svalues;
16: MPI_Request *rwaits,*swaits;
17: PetscErrorCode ierr;
18: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
21: if (mode & SCATTER_REVERSE) {
22: to = (VecScatter_MPI_General*)ctx->fromdata;
23: from = (VecScatter_MPI_General*)ctx->todata;
24: rwaits = from->rev_requests;
25: swaits = to->rev_requests;
26: } else {
27: to = (VecScatter_MPI_General*)ctx->todata;
28: from = (VecScatter_MPI_General*)ctx->fromdata;
29: rwaits = from->requests;
30: swaits = to->requests;
31: }
32: bs = to->bs;
33: svalues = to->values;
34: nrecvs = from->n;
35: nsends = to->n;
36: indices = to->indices;
37: sstarts = to->starts;
38: #if defined(PETSC_HAVE_CUSP)
39: VecCUSPAllocateCheckHost(xin);
40: if (xin->valid_GPU_array == PETSC_CUSP_GPU) {
41: if (xin->spptr && ctx->spptr) {
42: VecCUSPCopyFromGPUSome_Public(xin,(PetscCUSPIndices)ctx->spptr);
43: } else {
44: VecCUSPCopyFromGPU(xin);
45: }
46: }
47: xv = *((PetscScalar**)xin->data);
48: #elif defined(PETSC_HAVE_VECCUDA)
49: VecCUDAAllocateCheckHost(xin);
50: if (xin->valid_GPU_array == PETSC_CUDA_GPU) {
51: if (xin->spptr && ctx->spptr) {
52: VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr);
53: } else {
54: VecCUDACopyFromGPU(xin);
55: }
56: }
57: xv = *((PetscScalar**)xin->data);
58: #else
59: VecGetArrayRead(xin,(const PetscScalar**)&xv);
60: #endif
62: if (xin != yin) {VecGetArray(yin,&yv);}
63: else yv = xv;
65: if (!(mode & SCATTER_LOCAL)) {
66: if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv & !to->use_window) {
67: /* post receives since they were not previously posted */
68: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
69: }
71: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
72: if (to->use_alltoallw && addv == INSERT_VALUES) {
73: MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
74: } else
75: #endif
76: if (ctx->packtogether || to->use_alltoallv || to->use_window) {
77: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
78: PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues,bs);
79: if (to->use_alltoallv) {
80: MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
81: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
82: } else if (to->use_window) {
83: PetscInt cnt;
85: MPI_Win_fence(0,from->window);
86: for (i=0; i<nsends; i++) {
87: cnt = bs*(to->starts[i+1]-to->starts[i]);
88: MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
89: }
90: #endif
91: } else if (nsends) {
92: MPI_Startall_isend(to->starts[to->n],nsends,swaits);
93: }
94: } else {
95: /* this version packs and sends one at a time */
96: for (i=0; i<nsends; i++) {
97: PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
98: MPI_Start_isend(sstarts[i+1]-sstarts[i],swaits+i);
99: }
100: }
102: if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
103: /* post receives since they were not previously posted */
104: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
105: }
106: }
108: /* take care of local scatters */
109: if (to->local.n) {
110: if (to->local.is_copy && addv == INSERT_VALUES) {
111: if (yv != xv || from->local.copy_start != to->local.copy_start) {
112: PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
113: }
114: } else {
115: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
116: /* only copy entries that do not share identical memory locations */
117: PETSCMAP1(Scatter)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
118: } else {
119: PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
120: }
121: }
122: }
123: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
124: if (xin != yin) {VecRestoreArray(yin,&yv);}
125: return(0);
126: }
128: /* --------------------------------------------------------------------------------------*/
130: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
131: {
132: VecScatter_MPI_General *to,*from;
133: PetscScalar *rvalues,*yv;
134: PetscErrorCode ierr;
135: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
136: PetscMPIInt imdex;
137: MPI_Request *rwaits,*swaits;
138: MPI_Status xrstatus,*rstatus,*sstatus;
141: if (mode & SCATTER_LOCAL) return(0);
142: VecGetArray(yin,&yv);
144: to = (VecScatter_MPI_General*)ctx->todata;
145: from = (VecScatter_MPI_General*)ctx->fromdata;
146: rwaits = from->requests;
147: swaits = to->requests;
148: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
149: rstatus = to->rstatus;
150: if (mode & SCATTER_REVERSE) {
151: to = (VecScatter_MPI_General*)ctx->fromdata;
152: from = (VecScatter_MPI_General*)ctx->todata;
153: rwaits = from->rev_requests;
154: swaits = to->rev_requests;
155: }
156: bs = from->bs;
157: rvalues = from->values;
158: nrecvs = from->n;
159: nsends = to->n;
160: indices = from->indices;
161: rstarts = from->starts;
163: if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
164: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
165: if (to->use_window) {MPI_Win_fence(0,from->window);}
166: else
167: #endif
168: if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
169: PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv,bs);
170: } else if (!to->use_alltoallw) {
171: /* unpack one at a time */
172: count = nrecvs;
173: while (count) {
174: if (ctx->reproduce) {
175: imdex = count - 1;
176: MPI_Wait(rwaits+imdex,&xrstatus);
177: } else {
178: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
179: }
180: /* unpack receives into our local space */
181: PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
182: count--;
183: }
184: }
185: if (from->use_readyreceiver) {
186: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
187: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
188: }
190: /* wait on sends */
191: if (nsends && !to->use_alltoallv && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
192: VecRestoreArray(yin,&yv);
193: return(0);
194: }
196: #undef PETSCMAP1_a
197: #undef PETSCMAP1_b
198: #undef PETSCMAP1
199: #undef BS