Actual source code: vpscat_mpi1.h
petsc-3.10.2 2018-10-09
2: /*
3: Defines the methods VecScatterBegin/End_1,2,......
4: This is included by vpscat.c with different values for BS
6: This is a terrible way of doing "templates" in C.
7: */
8: #define PETSCMAP1_a(a,b) a ## _ ## b
9: #define PETSCMAP1_b(a,b) PETSCMAP1_a(a,b)
10: #define PETSCMAP1(a) PETSCMAP1_b(a,BS)
12: PetscErrorCode PETSCMAP1(VecScatterBeginMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
13: {
14: VecScatter_MPI_General *to,*from;
15: PetscScalar *xv,*yv,*svalues;
16: MPI_Request *rwaits,*swaits;
17: PetscErrorCode ierr;
18: PetscInt i,*indices,*sstarts,nrecvs,nsends,bs;
19: #if defined(PETSC_HAVE_VECCUDA)
20: PetscBool is_cudatype = PETSC_FALSE;
21: #endif
24: if (mode & SCATTER_REVERSE) {
25: to = (VecScatter_MPI_General*)ctx->fromdata;
26: from = (VecScatter_MPI_General*)ctx->todata;
27: rwaits = from->rev_requests;
28: swaits = to->rev_requests;
29: } else {
30: to = (VecScatter_MPI_General*)ctx->todata;
31: from = (VecScatter_MPI_General*)ctx->fromdata;
32: rwaits = from->requests;
33: swaits = to->requests;
34: }
35: bs = to->bs;
36: svalues = to->values;
37: nrecvs = from->n;
38: nsends = to->n;
39: indices = to->indices;
40: sstarts = to->starts;
41: #if defined(PETSC_HAVE_VECCUDA)
42: PetscObjectTypeCompareAny((PetscObject)xin,&is_cudatype,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
43: if (is_cudatype) {
44: VecCUDAAllocateCheckHost(xin);
45: if (xin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
46: if (xin->spptr && ctx->spptr) {
47: VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr);
48: } else {
49: VecCUDACopyFromGPU(xin);
50: }
51: }
52: xv = *((PetscScalar**)xin->data);
53: } else
54: #endif
55: {
56: VecGetArrayRead(xin,(const PetscScalar**)&xv);
57: }
59: if (xin != yin) {VecGetArray(yin,&yv);}
60: else yv = xv;
62: if (!(mode & SCATTER_LOCAL)) {
63: /* post receives since they were not previously posted */
64: if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
66: /* this version packs and sends one at a time */
67: for (i=0; i<nsends; i++) {
68: if (to->memcpy_plan.optimized[i]) { /* use memcpy instead of indivisual load/store */
69: VecScatterMemcpyPlanExecute_Pack(i,xv,&to->memcpy_plan,svalues+bs*sstarts[i],INSERT_VALUES,bs);
70: } else {
71: PETSCMAP1(Pack_MPI1)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
72: }
73: MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,swaits+i);
74: }
75: }
77: /* take care of local scatters */
78: if (to->local.n) {
79: if (to->local.memcpy_plan.optimized[0] && addv == INSERT_VALUES) {
80: /* do copy when it is not a self-to-self copy */
81: if (!(xv == yv && to->local.memcpy_plan.same_copy_starts)) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&to->local.memcpy_plan,yv,&from->local.memcpy_plan,addv); }
82: } else if (to->local.memcpy_plan.optimized[0]) {
83: VecScatterMemcpyPlanExecute_Scatter(0,xv,&to->local.memcpy_plan,yv,&from->local.memcpy_plan,addv);
84: } else {
85: if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
86: /* only copy entries that do not share identical memory locations */
87: PETSCMAP1(Scatter_MPI1)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
88: } else {
89: PETSCMAP1(Scatter_MPI1)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
90: }
91: }
92: }
93: VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
94: if (xin != yin) {VecRestoreArray(yin,&yv);}
95: return(0);
96: }
98: /* --------------------------------------------------------------------------------------*/
100: PetscErrorCode PETSCMAP1(VecScatterEndMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
101: {
102: VecScatter_MPI_General *to,*from;
103: PetscScalar *rvalues,*yv;
104: PetscErrorCode ierr;
105: PetscInt nrecvs,nsends,*indices,count,*rstarts,bs;
106: PetscMPIInt imdex;
107: MPI_Request *rwaits,*swaits;
108: MPI_Status xrstatus,*sstatus;
111: if (mode & SCATTER_LOCAL) return(0);
112: VecGetArray(yin,&yv);
114: to = (VecScatter_MPI_General*)ctx->todata;
115: from = (VecScatter_MPI_General*)ctx->fromdata;
116: rwaits = from->requests;
117: swaits = to->requests;
118: sstatus = to->sstatus; /* sstatus and rstatus are always stored in to */
119: if (mode & SCATTER_REVERSE) {
120: to = (VecScatter_MPI_General*)ctx->fromdata;
121: from = (VecScatter_MPI_General*)ctx->todata;
122: rwaits = from->rev_requests;
123: swaits = to->rev_requests;
124: }
125: bs = from->bs;
126: rvalues = from->values;
127: nrecvs = from->n;
128: nsends = to->n;
129: indices = from->indices;
130: rstarts = from->starts;
132: /* unpack one at a time */
133: count = nrecvs;
134: while (count) {
135: MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
136: /* unpack receives into our local space */
137: if (from->memcpy_plan.optimized[imdex]) {
138: VecScatterMemcpyPlanExecute_Unpack(imdex,rvalues+bs*rstarts[imdex],yv,&from->memcpy_plan,addv,bs);
139: } else {
140: PETSCMAP1(UnPack_MPI1)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
141: }
142: count--;
143: }
145: /* wait on sends */
146: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
147: VecRestoreArray(yin,&yv);
148: return(0);
149: }
151: #undef PETSCMAP1_a
152: #undef PETSCMAP1_b
153: #undef PETSCMAP1
154: #undef BS