Actual source code: vpscat.h
petsc-3.4.2 2013-07-02
2: /*
3: Defines the methods VecScatterBegin/End_1,2,......
4: This is included by vpscat.c with different values for BS
6: This is a terrible way of doing "templates" in C.
7: */
8: #define PETSCMAP1_a(a,b) a ## _ ## b
9: #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b)
10: #define PETSCMAP1(a) PETSCMAP1_b(a,BS)
14: PetscErrorCode PETSCMAP1(VecScatterBegin)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
15: {
16: VecScatter_MPI_General *to,*from;
17: PetscScalar *xv,*yv,*svalues;
18: MPI_Request *rwaits,*swaits;
19: PetscErrorCode ierr;
20: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
23: if (mode & SCATTER_REVERSE) {
24: to = (VecScatter_MPI_General*)ctx->fromdata;
25: from = (VecScatter_MPI_General*)ctx->todata;
26: rwaits = from->rev_requests;
27: swaits = to->rev_requests;
28: } else {
29: to = (VecScatter_MPI_General*)ctx->todata;
30: from = (VecScatter_MPI_General*)ctx->fromdata;
31: rwaits = from->requests;
32: swaits = to->requests;
33: }
34: bs = to->bs;
35: svalues = to->values;
36: nrecvs = from->n;
37: nsends = to->n;
38: indices = to->indices;
39: sstarts = to->starts;
40: #if defined(PETSC_HAVE_CUSP)
42: #if defined(PETSC_HAVE_TXPETSCGPU)
44: #if 0
45: /* This branch messages the entire vector */
46: VecGetArrayRead(xin,(const PetscScalar**)&xv);
47: #else
48: /*
49: This branch messages only the parts that are necessary.
50: ... this seems to perform about the same due to the necessity of calling
51: a separate kernel before the SpMV for gathering data into
52: a contiguous buffer. We leaves both branches in for the time being.
53: I expect that ultimately this branch will be the right choice, however
54: the just is still out.
55: */
56: if (xin->valid_GPU_array == PETSC_CUSP_GPU) {
57: if (xin->spptr && ctx->spptr) {
58: VecCUSPCopyFromGPUSome_Public(xin,(PetscCUSPIndices)ctx->spptr);
59: }
60: }
61: #endif
62: xv = *(PetscScalar**)xin->data;
64: #else
65: if (!xin->map->n || ((xin->map->n > 10000) && (sstarts[nsends]*bs < 0.05*xin->map->n) && (xin->valid_GPU_array == PETSC_CUSP_GPU) && !(to->local.n))) {
66: if (!ctx->spptr) {
67: PetscInt k,*tindices,n = sstarts[nsends],*sindices;
68: PetscMalloc(n*sizeof(PetscInt),&tindices);
69: PetscMemcpy(tindices,to->indices,n*sizeof(PetscInt));
70: PetscSortRemoveDupsInt(&n,tindices);
71: PetscMalloc(bs*n*sizeof(PetscInt),&sindices);
72: for (i=0; i<n; i++) {
73: for (k=0; k<bs; k++) sindices[i*bs+k] = tindices[i]+k;
74: }
75: PetscFree(tindices);
76: PetscCUSPIndicesCreate(n*bs,sindices,n*bs,sindices,(PetscCUSPIndices*)&ctx->spptr);
77: PetscFree(sindices);
78: }
79: VecCUSPCopyFromGPUSome_Public(xin,(PetscCUSPIndices)ctx->spptr);
80: xv = *(PetscScalar**)xin->data;
81: } else {
82: VecGetArrayRead(xin,(const PetscScalar**)&xv);
83: }
84: #endif
86: #else
87: VecGetArrayRead(xin,(const PetscScalar**)&xv);
88: #endif
90: if (xin != yin) {VecGetArray(yin,&yv);}
91: else yv = xv;
93: if (!(mode & SCATTER_LOCAL)) {
94: if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv & !to->use_window) {
95: /* post receives since they were not previously posted */
96: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
97: }
99: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
100: if (to->use_alltoallw && addv == INSERT_VALUES) {
101: MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
102: } else
103: #endif
104: if (ctx->packtogether || to->use_alltoallv || to->use_window) {
105: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
106: PETSCMAP1(Pack)(sstarts[nsends],indices,xv,svalues);
107: if (to->use_alltoallv) {
108: MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
109: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
110: } else if (to->use_window) {
111: PetscInt cnt;
113: MPI_Win_fence(0,from->window);
114: for (i=0; i<nsends; i++) {
115: cnt = bs*(to->starts[i+1]-to->starts[i]);
116: MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
117: }
118: #endif
119: } else if (nsends) {
120: MPI_Startall_isend(to->starts[to->n],nsends,swaits);
121: }
122: } else {
123: /* this version packs and sends one at a time */
124: for (i=0; i<nsends; i++) {
125: PETSCMAP1(Pack)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i]);
126: MPI_Start_isend(sstarts[i+1]-sstarts[i],swaits+i);
127: }
128: }
130: if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
131: /* post receives since they were not previously posted */
132: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
133: }
134: }
136: /* take care of local scatters */
137: if (to->local.n) {
138: if (to->local.is_copy && addv == INSERT_VALUES) {
139: PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
140: } else {
141: PETSCMAP1(Scatter)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv);
142: }
143: }
144: #if defined(PETSC_HAVE_CUSP)
145: if (xin->valid_GPU_array != PETSC_CUSP_GPU) {
146: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
147: } else {
148: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
149: }
150: #else
151: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
152: #endif
153: if (xin != yin) {VecRestoreArray(yin,&yv);}
154: return(0);
155: }
157: /* --------------------------------------------------------------------------------------*/
161: PetscErrorCode PETSCMAP1(VecScatterEnd)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
162: {
163: VecScatter_MPI_General *to,*from;
164: PetscScalar *rvalues,*yv;
165: PetscErrorCode ierr;
166: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
167: PetscMPIInt imdex;
168: MPI_Request *rwaits,*swaits;
169: MPI_Status xrstatus,*rstatus,*sstatus;
172: if (mode & SCATTER_LOCAL) return(0);
173: VecGetArray(yin,&yv);
175: to = (VecScatter_MPI_General*)ctx->todata;
176: from = (VecScatter_MPI_General*)ctx->fromdata;
177: rwaits = from->requests;
178: swaits = to->requests;
179: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
180: rstatus = to->rstatus;
181: if (mode & SCATTER_REVERSE) {
182: to = (VecScatter_MPI_General*)ctx->fromdata;
183: from = (VecScatter_MPI_General*)ctx->todata;
184: rwaits = from->rev_requests;
185: swaits = to->rev_requests;
186: }
187: bs = from->bs;
188: rvalues = from->values;
189: nrecvs = from->n;
190: nsends = to->n;
191: indices = from->indices;
192: rstarts = from->starts;
194: if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
195: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
196: if (to->use_window) {MPI_Win_fence(0,from->window);}
197: else
198: #endif
199: if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
200: PETSCMAP1(UnPack)(from->starts[from->n],from->values,indices,yv,addv);
201: } else if (!to->use_alltoallw) {
202: /* unpack one at a time */
203: count = nrecvs;
204: while (count) {
205: if (ctx->reproduce) {
206: imdex = count - 1;
207: MPI_Wait(rwaits+imdex,&xrstatus);
208: } else {
209: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
210: }
211: /* unpack receives into our local space */
212: PETSCMAP1(UnPack)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv);
213: count--;
214: }
215: }
216: if (from->use_readyreceiver) {
217: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
218: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
219: }
221: /* wait on sends */
222: if (nsends && !to->use_alltoallv && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
223: VecRestoreArray(yin,&yv);
224: #if defined(PETSC_HAVE_TXPETSCGPU)
225: if (yin->valid_GPU_array == PETSC_CUSP_CPU) {
226: if (yin->spptr && ctx->spptr) {
227: VecCUSPCopyToGPUSome_Public(yin,(PetscCUSPIndices)ctx->spptr);
228: }
229: }
230: #endif
231: return(0);
232: }
234: #undef PETSCMAP1_a
235: #undef PETSCMAP1_b
236: #undef PETSCMAP1
237: #undef BS