Actual source code: vpscat_mpi1.h
petsc-3.9.2 2018-05-20
2: /*
3: Defines the methods VecScatterBegin/End_1,2,......
4: This is included by vpscat.c with different values for BS
6: This is a terrible way of doing "templates" in C.
7: */
8: #define PETSCMAP1_a(a,b) a ## _ ## b
9: #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b)
10: #define PETSCMAP1(a) PETSCMAP1_b(a,BS)
12: PetscErrorCode PETSCMAP1(VecScatterBeginMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
13: {
14: VecScatter_MPI_General *to,*from;
15: PetscScalar *xv,*yv,*svalues;
16: MPI_Request *rwaits,*swaits;
17: PetscErrorCode ierr;
18: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
19: #if defined(PETSC_HAVE_VIENNACL)
20: PetscBool is_viennacltype = PETSC_FALSE;
21: #endif
24: if (mode & SCATTER_REVERSE) {
25: to = (VecScatter_MPI_General*)ctx->fromdata;
26: from = (VecScatter_MPI_General*)ctx->todata;
27: rwaits = from->rev_requests;
28: swaits = to->rev_requests;
29: } else {
30: to = (VecScatter_MPI_General*)ctx->todata;
31: from = (VecScatter_MPI_General*)ctx->fromdata;
32: rwaits = from->requests;
33: swaits = to->requests;
34: }
35: bs = to->bs;
36: svalues = to->values;
37: nrecvs = from->n;
38: nsends = to->n;
39: indices = to->indices;
40: sstarts = to->starts;
41: #if defined(PETSC_HAVE_VIENNACL)
42: PetscObjectTypeCompareAny((PetscObject)xin,&is_viennacltype,VECSEQVIENNACL,VECMPIVIENNACL,VECVIENNACL,"");
43: if (is_viennacltype) {
44: VecGetArrayRead(xin,(const PetscScalar**)&xv);
45: } else
46: #endif
47: #if defined(PETSC_HAVE_VECCUDA)
48: {
49: VecCUDAAllocateCheckHost(xin);
50: if (xin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
51: if (xin->spptr && ctx->spptr) {
52: VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr);
53: } else {
54: VecCUDACopyFromGPU(xin);
55: }
56: }
57: xv = *((PetscScalar**)xin->data);
58: }
59: #else
60: {
61: VecGetArrayRead(xin,(const PetscScalar**)&xv);
62: }
63: #endif
65: if (xin != yin) {VecGetArray(yin,&yv);}
66: else yv = xv;
68: if (!(mode & SCATTER_LOCAL)) {
69: if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv && !to->use_window) {
70: /* post receives since they were not previously posted */
71: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
72: }
74: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
75: if (to->use_alltoallw && addv == INSERT_VALUES) {
76: MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
77: } else
78: #endif
79: if (ctx->packtogether || to->use_alltoallv || to->use_window) {
80: /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
81: PETSCMAP1(Pack_MPI1)(sstarts[nsends],indices,xv,svalues,bs);
82: if (to->use_alltoallv) {
83: MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
84: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
85: } else if (to->use_window) {
86: PetscInt cnt;
88: MPI_Win_fence(0,from->window);
89: for (i=0; i<nsends; i++) {
90: cnt = bs*(to->starts[i+1]-to->starts[i]);
91: MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
92: }
93: #endif
94: } else if (nsends) {
95: MPI_Startall_isend(to->starts[to->n]*bs,nsends,swaits);
96: }
97: } else {
98: /* this version packs and sends one at a time */
99: for (i=0; i<nsends; i++) {
100: PETSCMAP1(Pack_MPI1)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
101: MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,swaits+i);
102: }
103: }
105: if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
106: /* post receives since they were not previously posted */
107: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
108: }
109: }
111: /* take care of local scatters */
112: if (to->local.n) {
113: if (to->local.is_copy && addv == INSERT_VALUES) {
114: if (yv != xv || from->local.copy_start != to->local.copy_start) {
115: PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
116: }
117: } else {
118: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
119: /* only copy entries that do not share identical memory locations */
120: PETSCMAP1(Scatter_MPI1)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
121: } else {
122: PETSCMAP1(Scatter_MPI1)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
123: }
124: }
125: }
126: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
127: if (xin != yin) {VecRestoreArray(yin,&yv);}
128: return(0);
129: }
131: /* --------------------------------------------------------------------------------------*/
133: PetscErrorCode PETSCMAP1(VecScatterEndMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
134: {
135: VecScatter_MPI_General *to,*from;
136: PetscScalar *rvalues,*yv;
137: PetscErrorCode ierr;
138: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
139: PetscMPIInt imdex;
140: MPI_Request *rwaits,*swaits;
141: MPI_Status xrstatus,*rstatus,*sstatus;
144: if (mode & SCATTER_LOCAL) return(0);
145: VecGetArray(yin,&yv);
147: to = (VecScatter_MPI_General*)ctx->todata;
148: from = (VecScatter_MPI_General*)ctx->fromdata;
149: rwaits = from->requests;
150: swaits = to->requests;
151: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
152: rstatus = to->rstatus;
153: if (mode & SCATTER_REVERSE) {
154: to = (VecScatter_MPI_General*)ctx->fromdata;
155: from = (VecScatter_MPI_General*)ctx->todata;
156: rwaits = from->rev_requests;
157: swaits = to->rev_requests;
158: }
159: bs = from->bs;
160: rvalues = from->values;
161: nrecvs = from->n;
162: nsends = to->n;
163: indices = from->indices;
164: rstarts = from->starts;
166: if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
167: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
168: if (to->use_window) {MPI_Win_fence(0,from->window);}
169: else
170: #endif
171: if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
172: PETSCMAP1(UnPack_MPI1)(from->starts[from->n],from->values,indices,yv,addv,bs);
173: } else if (!to->use_alltoallw) {
174: /* unpack one at a time */
175: count = nrecvs;
176: while (count) {
177: if (ctx->reproduce) {
178: imdex = count - 1;
179: MPI_Wait(rwaits+imdex,&xrstatus);
180: } else {
181: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
182: }
183: /* unpack receives into our local space */
184: PETSCMAP1(UnPack_MPI1)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
185: count--;
186: }
187: }
188: if (from->use_readyreceiver) {
189: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
190: MPI_Barrier(PetscObjectComm((PetscObject)ctx));
191: }
193: /* wait on sends */
194: if (nsends && !to->use_alltoallv && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
195: VecRestoreArray(yin,&yv);
196: return(0);
197: }
199: #undef PETSCMAP1_a
200: #undef PETSCMAP1_b
201: #undef PETSCMAP1
202: #undef BS