Actual source code: veccuda.c
petsc-3.8.3 2017-12-09
1: /*
2: Implementation of the sequential cuda vectors.
4: This file contains the code that can be compiled with a C
5: compiler. The companion file veccuda2.cu contains the code that
6: must be compiled with nvcc or a C++ compiler.
7: */
9: #define PETSC_SKIP_SPINLOCK
11: #include <petscconf.h>
12: #include <petsc/private/vecimpl.h> /*I <petscvec.h> I*/
13: #include <../src/vec/vec/impls/dvecimpl.h>
14: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
16: /*
17: Allocates space for the vector array on the Host if it does not exist.
18: Does NOT change the PetscCUDAFlag for the vector
19: Does NOT zero the CUDA array
20: */
21: PetscErrorCode VecCUDAAllocateCheckHost(Vec v)
22: {
24: PetscScalar *array;
25: Vec_Seq *s = (Vec_Seq*)v->data;
26: PetscInt n = v->map->n;
29: if (!s) {
30: PetscNewLog((PetscObject)v,&s);
31: v->data = s;
32: }
33: if (!s->array) {
34: PetscMalloc1(n,&array);
35: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
36: s->array = array;
37: s->array_allocated = array;
38: if (v->valid_GPU_array == PETSC_CUDA_UNALLOCATED) {
39: v->valid_GPU_array = PETSC_CUDA_CPU;
40: }
41: }
42: return(0);
43: }
45: PetscErrorCode VecCopy_SeqCUDA_Private(Vec xin,Vec yin)
46: {
47: PetscScalar *ya;
48: const PetscScalar *xa;
49: PetscErrorCode ierr;
52: VecCUDAAllocateCheckHost(xin);
53: VecCUDAAllocateCheckHost(yin);
54: if (xin != yin) {
55: VecGetArrayRead(xin,&xa);
56: VecGetArray(yin,&ya);
57: PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
58: VecRestoreArrayRead(xin,&xa);
59: VecRestoreArray(yin,&ya);
60: }
61: return(0);
62: }
64: PetscErrorCode VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)
65: {
67: PetscInt n = xin->map->n,i;
68: PetscScalar *xx;
71: VecGetArray(xin,&xx);
72: for (i=0; i<n; i++) { PetscRandomGetValue(r,&xx[i]); }
73: VecRestoreArray(xin,&xx);
74: return(0);
75: }
77: PetscErrorCode VecDestroy_SeqCUDA_Private(Vec v)
78: {
79: Vec_Seq *vs = (Vec_Seq*)v->data;
83: PetscObjectSAWsViewOff(v);
84: #if defined(PETSC_USE_LOG)
85: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
86: #endif
87: if (vs) {
88: if (vs->array_allocated) { PetscFree(vs->array_allocated); }
89: PetscFree(vs);
90: }
91: return(0);
92: }
94: PetscErrorCode VecResetArray_SeqCUDA_Private(Vec vin)
95: {
96: Vec_Seq *v = (Vec_Seq*)vin->data;
99: v->array = v->unplacedarray;
100: v->unplacedarray = 0;
101: return(0);
102: }
104: PetscErrorCode VecCUDAAllocateCheck_Public(Vec v)
105: {
109: VecCUDAAllocateCheck(v);
110: return(0);
111: }
113: PetscErrorCode VecCUDACopyToGPU_Public(Vec v)
114: {
118: VecCUDACopyToGPU(v);
119: return(0);
120: }
122: /*
123: VecCUDACopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector
125: Input Parameters:
126: . v - the vector
127: . indices - the requested indices, this should be created with CUDAIndicesCreate()
129: */
130: PetscErrorCode VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci)
131: {
135: VecCUDACopyToGPUSome(v,ci);
136: return(0);
137: }
139: /*
140: VecCUDACopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector
142: Input Parameters:
143: + v - the vector
144: - indices - the requested indices, this should be created with CUDAIndicesCreate()
145: */
146: PetscErrorCode VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci)
147: {
151: VecCUDACopyFromGPUSome(v,ci);
152: return(0);
153: }
155: PetscErrorCode VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)
156: {
160: VecSetRandom_SeqCUDA_Private(xin,r);
161: xin->valid_GPU_array = PETSC_CUDA_CPU;
162: return(0);
163: }
165: PetscErrorCode VecResetArray_SeqCUDA(Vec vin)
166: {
170: VecCUDACopyFromGPU(vin);
171: VecResetArray_SeqCUDA_Private(vin);
172: vin->valid_GPU_array = PETSC_CUDA_CPU;
173: return(0);
174: }
176: PetscErrorCode VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
177: {
181: VecCUDACopyFromGPU(vin);
182: VecPlaceArray_Seq(vin,a);
183: vin->valid_GPU_array = PETSC_CUDA_CPU;
184: return(0);
185: }
187: PetscErrorCode VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
188: {
192: VecCUDACopyFromGPU(vin);
193: VecReplaceArray_Seq(vin,a);
194: vin->valid_GPU_array = PETSC_CUDA_CPU;
195: return(0);
196: }
198: /*@
199: VecCreateSeqCUDA - Creates a standard, sequential array-style vector.
201: Collective on MPI_Comm
203: Input Parameter:
204: . comm - the communicator, should be PETSC_COMM_SELF
205: . n - the vector length
207: Output Parameter:
208: . V - the vector
210: Notes:
211: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
212: same type as an existing vector.
214: Level: intermediate
216: Concepts: vectors^creating sequential
218: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
219: @*/
220: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec *v)
221: {
225: VecCreate(comm,v);
226: VecSetSizes(*v,n,n);
227: VecSetType(*v,VECSEQCUDA);
228: return(0);
229: }
231: PetscErrorCode VecDuplicate_SeqCUDA(Vec win,Vec *V)
232: {
236: VecCreateSeqCUDA(PetscObjectComm((PetscObject)win),win->map->n,V);
237: PetscLayoutReference(win->map,&(*V)->map);
238: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
239: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
240: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
241: return(0);
242: }
244: PetscErrorCode VecCreate_SeqCUDA(Vec V)
245: {
249: PetscLayoutSetUp(V->map);
250: VecCUDAAllocateCheck(V);
251: V->valid_GPU_array = PETSC_CUDA_GPU;
252: VecCreate_SeqCUDA_Private(V,((Vec_CUDA*)V->spptr)->GPUarray_allocated);
253: VecSet(V,0.0);
254: return(0);
255: }
257: /*@C
258: VecCreateSeqCUDAWithArray - Creates a CUDA sequential array-style vector,
259: where the user provides the array space to store the vector values. The array
260: provided must be a GPU array.
262: Collective on MPI_Comm
264: Input Parameter:
265: + comm - the communicator, should be PETSC_COMM_SELF
266: . bs - the block size
267: . n - the vector length
268: - array - GPU memory where the vector elements are to be stored.
270: Output Parameter:
271: . V - the vector
273: Notes:
274: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
275: same type as an existing vector.
277: If the user-provided array is NULL, then VecCUDAPlaceArray() can be used
278: at a later stage to SET the array for storing the vector values.
280: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
281: The user should not free the array until the vector is destroyed.
283: Level: intermediate
285: Concepts: vectors^creating with array
287: .seealso: VecCreateMPICUDAWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
288: VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
289: VecCreateMPIWithArray()
290: @*/
291: PetscErrorCode VecCreateSeqCUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
292: {
294: PetscMPIInt size;
297: VecCreate(comm,V);
298: VecSetSizes(*V,n,n);
299: VecSetBlockSize(*V,bs);
300: MPI_Comm_size(comm,&size);
301: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
302: VecCreate_SeqCUDA_Private(*V,array);
303: return(0);
304: }
306: PetscErrorCode VecCreate_SeqCUDA_Private(Vec V,const PetscScalar *array)
307: {
309: cudaError_t err;
310: Vec_CUDA *veccuda;
311: PetscMPIInt size;
314: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
315: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQCUDA on more than one process");
316: VecCreate_Seq_Private(V,0);
317: PetscObjectChangeTypeName((PetscObject)V,VECSEQCUDA);
319: V->ops->dot = VecDot_SeqCUDA;
320: V->ops->norm = VecNorm_SeqCUDA;
321: V->ops->tdot = VecTDot_SeqCUDA;
322: V->ops->scale = VecScale_SeqCUDA;
323: V->ops->copy = VecCopy_SeqCUDA;
324: V->ops->set = VecSet_SeqCUDA;
325: V->ops->swap = VecSwap_SeqCUDA;
326: V->ops->axpy = VecAXPY_SeqCUDA;
327: V->ops->axpby = VecAXPBY_SeqCUDA;
328: V->ops->axpbypcz = VecAXPBYPCZ_SeqCUDA;
329: V->ops->pointwisemult = VecPointwiseMult_SeqCUDA;
330: V->ops->pointwisedivide = VecPointwiseDivide_SeqCUDA;
331: V->ops->setrandom = VecSetRandom_SeqCUDA;
332: V->ops->dot_local = VecDot_SeqCUDA;
333: V->ops->tdot_local = VecTDot_SeqCUDA;
334: V->ops->norm_local = VecNorm_SeqCUDA;
335: V->ops->mdot_local = VecMDot_SeqCUDA;
336: V->ops->maxpy = VecMAXPY_SeqCUDA;
337: V->ops->mdot = VecMDot_SeqCUDA;
338: V->ops->aypx = VecAYPX_SeqCUDA;
339: V->ops->waxpy = VecWAXPY_SeqCUDA;
340: V->ops->dotnorm2 = VecDotNorm2_SeqCUDA;
341: V->ops->placearray = VecPlaceArray_SeqCUDA;
342: V->ops->replacearray = VecReplaceArray_SeqCUDA;
343: V->ops->resetarray = VecResetArray_SeqCUDA;
344: V->ops->destroy = VecDestroy_SeqCUDA;
345: V->ops->duplicate = VecDuplicate_SeqCUDA;
346: V->ops->conjugate = VecConjugate_SeqCUDA;
347: V->ops->getlocalvector = VecGetLocalVector_SeqCUDA;
348: V->ops->restorelocalvector = VecRestoreLocalVector_SeqCUDA;
349: V->ops->getlocalvectorread = VecGetLocalVector_SeqCUDA;
350: V->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;
352: /* Later, functions check for the Vec_CUDA structure existence, so do not create it without array */
353: if (array) {
354: if (!V->spptr) {
355: PetscMalloc(sizeof(Vec_CUDA),&V->spptr);
356: veccuda = (Vec_CUDA*)V->spptr;
357: err = cudaStreamCreate(&veccuda->stream);CHKERRCUDA(err);
358: veccuda->GPUarray_allocated = 0;
359: veccuda->hostDataRegisteredAsPageLocked = PETSC_FALSE;
360: V->valid_GPU_array = PETSC_CUDA_UNALLOCATED;
361: }
362: veccuda = (Vec_CUDA*)V->spptr;
363: veccuda->GPUarray = (PetscScalar*)array;
364: }
365: return(0);
366: }