Actual source code: vpscat_mpi1.h

petsc-3.9.1 2018-04-29
Report Typos and Errors

  2: /*
  3:      Defines the methods VecScatterBegin/End_1,2,......
  4:      This is included by vpscat.c with different values for BS

  6:      This is a terrible way of doing "templates" in C.
  7: */
  8: #define PETSCMAP1_a(a,b)  a ## _ ## b
  9: #define PETSCMAP1_b(a,b)  PETSCMAP1_a(a,b)
 10: #define PETSCMAP1(a)      PETSCMAP1_b(a,BS)

 12: PetscErrorCode PETSCMAP1(VecScatterBeginMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
 13: {
 14:   VecScatter_MPI_General *to,*from;
 15:   PetscScalar            *xv,*yv,*svalues;
 16:   MPI_Request            *rwaits,*swaits;
 17:   PetscErrorCode         ierr;
 18:   PetscInt               i,*indices,*sstarts,nrecvs,nsends,bs;
 19: #if defined(PETSC_HAVE_VIENNACL)
 20:   PetscBool              is_viennacltype = PETSC_FALSE;
 21: #endif

 24:   if (mode & SCATTER_REVERSE) {
 25:     to     = (VecScatter_MPI_General*)ctx->fromdata;
 26:     from   = (VecScatter_MPI_General*)ctx->todata;
 27:     rwaits = from->rev_requests;
 28:     swaits = to->rev_requests;
 29:   } else {
 30:     to     = (VecScatter_MPI_General*)ctx->todata;
 31:     from   = (VecScatter_MPI_General*)ctx->fromdata;
 32:     rwaits = from->requests;
 33:     swaits = to->requests;
 34:   }
 35:   bs      = to->bs;
 36:   svalues = to->values;
 37:   nrecvs  = from->n;
 38:   nsends  = to->n;
 39:   indices = to->indices;
 40:   sstarts = to->starts;
 41: #if defined(PETSC_HAVE_VIENNACL)
 42:   PetscObjectTypeCompareAny((PetscObject)xin,&is_viennacltype,VECSEQVIENNACL,VECMPIVIENNACL,VECVIENNACL,"");
 43:   if (is_viennacltype) {
 44:     VecGetArrayRead(xin,(const PetscScalar**)&xv);
 45:   } else
 46: #endif
 47: #if defined(PETSC_HAVE_VECCUDA)
 48:   {
 49:     VecCUDAAllocateCheckHost(xin);
 50:     if (xin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
 51:       if (xin->spptr && ctx->spptr) {
 52:         VecCUDACopyFromGPUSome_Public(xin,(PetscCUDAIndices)ctx->spptr);
 53:       } else {
 54:         VecCUDACopyFromGPU(xin);
 55:       }
 56:     }
 57:     xv = *((PetscScalar**)xin->data);
 58:   }
 59: #else
 60:   {
 61:     VecGetArrayRead(xin,(const PetscScalar**)&xv);
 62:   }
 63: #endif

 65:   if (xin != yin) {VecGetArray(yin,&yv);}
 66:   else yv = xv;

 68:   if (!(mode & SCATTER_LOCAL)) {
 69:     if (!from->use_readyreceiver && !to->sendfirst && !to->use_alltoallv && !to->use_window) {
 70:       /* post receives since they were not previously posted    */
 71:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
 72:     }

 74: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
 75:     if (to->use_alltoallw && addv == INSERT_VALUES) {
 76:       MPI_Alltoallw(xv,to->wcounts,to->wdispls,to->types,yv,from->wcounts,from->wdispls,from->types,PetscObjectComm((PetscObject)ctx));
 77:     } else
 78: #endif
 79:     if (ctx->packtogether || to->use_alltoallv || to->use_window) {
 80:       /* this version packs all the messages together and sends, when -vecscatter_packtogether used */
 81:       PETSCMAP1(Pack_MPI1)(sstarts[nsends],indices,xv,svalues,bs);
 82:       if (to->use_alltoallv) {
 83:         MPI_Alltoallv(to->values,to->counts,to->displs,MPIU_SCALAR,from->values,from->counts,from->displs,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
 84: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
 85:       } else if (to->use_window) {
 86:         PetscInt cnt;

 88:         MPI_Win_fence(0,from->window);
 89:         for (i=0; i<nsends; i++) {
 90:           cnt  = bs*(to->starts[i+1]-to->starts[i]);
 91:           MPI_Put(to->values+bs*to->starts[i],cnt,MPIU_SCALAR,to->procs[i],bs*to->winstarts[i],cnt,MPIU_SCALAR,from->window);
 92:         }
 93: #endif
 94:       } else if (nsends) {
 95:         MPI_Startall_isend(to->starts[to->n]*bs,nsends,swaits);
 96:       }
 97:     } else {
 98:       /* this version packs and sends one at a time */
 99:       for (i=0; i<nsends; i++) {
100:         PETSCMAP1(Pack_MPI1)(sstarts[i+1]-sstarts[i],indices + sstarts[i],xv,svalues + bs*sstarts[i],bs);
101:         MPI_Start_isend((sstarts[i+1]-sstarts[i])*bs,swaits+i);
102:       }
103:     }

105:     if (!from->use_readyreceiver && to->sendfirst && !to->use_alltoallv && !to->use_window) {
106:       /* post receives since they were not previously posted   */
107:       if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
108:     }
109:   }

111:   /* take care of local scatters */
112:   if (to->local.n) {
113:     if (to->local.is_copy && addv == INSERT_VALUES) {
114:       if (yv != xv || from->local.copy_start !=  to->local.copy_start) {
115:         PetscMemcpy(yv + from->local.copy_start,xv + to->local.copy_start,to->local.copy_length);
116:       }
117:     } else {
118:       if (xv == yv && addv == INSERT_VALUES && to->local.nonmatching_computed) {
119:         /* only copy entries that do not share identical memory locations */
120:         PETSCMAP1(Scatter_MPI1)(to->local.n_nonmatching,to->local.slots_nonmatching,xv,from->local.slots_nonmatching,yv,addv,bs);
121:       } else {
122:         PETSCMAP1(Scatter_MPI1)(to->local.n,to->local.vslots,xv,from->local.vslots,yv,addv,bs);
123:       }
124:     }
125:   }
126:   VecRestoreArrayRead(xin,(const PetscScalar**)&xv);
127:   if (xin != yin) {VecRestoreArray(yin,&yv);}
128:   return(0);
129: }

131: /* --------------------------------------------------------------------------------------*/

133: PetscErrorCode PETSCMAP1(VecScatterEndMPI1)(VecScatter ctx,Vec xin,Vec yin,InsertMode addv,ScatterMode mode)
134: {
135:   VecScatter_MPI_General *to,*from;
136:   PetscScalar            *rvalues,*yv;
137:   PetscErrorCode         ierr;
138:   PetscInt               nrecvs,nsends,*indices,count,*rstarts,bs;
139:   PetscMPIInt            imdex;
140:   MPI_Request            *rwaits,*swaits;
141:   MPI_Status             xrstatus,*rstatus,*sstatus;

144:   if (mode & SCATTER_LOCAL) return(0);
145:   VecGetArray(yin,&yv);

147:   to      = (VecScatter_MPI_General*)ctx->todata;
148:   from    = (VecScatter_MPI_General*)ctx->fromdata;
149:   rwaits  = from->requests;
150:   swaits  = to->requests;
151:   sstatus = to->sstatus;    /* sstatus and rstatus are always stored in to */
152:   rstatus = to->rstatus;
153:   if (mode & SCATTER_REVERSE) {
154:     to     = (VecScatter_MPI_General*)ctx->fromdata;
155:     from   = (VecScatter_MPI_General*)ctx->todata;
156:     rwaits = from->rev_requests;
157:     swaits = to->rev_requests;
158:   }
159:   bs      = from->bs;
160:   rvalues = from->values;
161:   nrecvs  = from->n;
162:   nsends  = to->n;
163:   indices = from->indices;
164:   rstarts = from->starts;

166:   if (ctx->packtogether || (to->use_alltoallw && (addv != INSERT_VALUES)) || (to->use_alltoallv && !to->use_alltoallw) || to->use_window) {
167: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
168:     if (to->use_window) {MPI_Win_fence(0,from->window);}
169:     else
170: #endif
171:     if (nrecvs && !to->use_alltoallv) {MPI_Waitall(nrecvs,rwaits,rstatus);}
172:     PETSCMAP1(UnPack_MPI1)(from->starts[from->n],from->values,indices,yv,addv,bs);
173:   } else if (!to->use_alltoallw) {
174:     /* unpack one at a time */
175:     count = nrecvs;
176:     while (count) {
177:       if (ctx->reproduce) {
178:         imdex = count - 1;
179:         MPI_Wait(rwaits+imdex,&xrstatus);
180:       } else {
181:         MPI_Waitany(nrecvs,rwaits,&imdex,&xrstatus);
182:       }
183:       /* unpack receives into our local space */
184:       PETSCMAP1(UnPack_MPI1)(rstarts[imdex+1] - rstarts[imdex],rvalues + bs*rstarts[imdex],indices + rstarts[imdex],yv,addv,bs);
185:       count--;
186:     }
187:   }
188:   if (from->use_readyreceiver) {
189:     if (nrecvs) {MPI_Startall_irecv(from->starts[nrecvs]*bs,nrecvs,rwaits);}
190:     MPI_Barrier(PetscObjectComm((PetscObject)ctx));
191:   }

193:   /* wait on sends */
194:   if (nsends  && !to->use_alltoallv  && !to->use_window) {MPI_Waitall(nsends,swaits,sstatus);}
195:   VecRestoreArray(yin,&yv);
196:   return(0);
197: }

199: #undef PETSCMAP1_a
200: #undef PETSCMAP1_b
201: #undef PETSCMAP1
202: #undef BS