Actual source code: cuspvecimpl.h
petsc-3.7.2 2016-06-05
4: #if defined(__CUDACC__)
6: #include <petsccusp.h>
7: #include <petsc/private/vecimpl.h>
9: #include <algorithm>
10: #include <vector>
11: #include <string>
13: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500
14: #include <cusp/blas/blas.h>
15: #else
16: #include <cusp/blas.h>
17: #endif
18: #include <thrust/host_vector.h>
19: #include <thrust/device_vector.h>
20: #include <thrust/iterator/constant_iterator.h>
21: #include <thrust/transform.h>
22: #include <thrust/iterator/permutation_iterator.h>
24: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
25: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory>
26: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
27: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>
28: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)
29: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])
30: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0
32: struct Vec_CUSP {
33: CUSPARRAY *GPUarray; /* this always holds the GPU data */
34: cudaStream_t stream; /* A stream for doing asynchronous data transfers */
35: PetscBool hostDataRegisteredAsPageLocked;
36: };
38: #endif
41: #include <cuda_runtime.h>
43: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar*, PetscScalar*);
44: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec);
45: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec);
46: PETSC_INTERN PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar*);
47: PETSC_EXTERN PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar);
48: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar*,Vec*);
49: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
50: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec);
51: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar*);
52: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP(Vec);
53: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar*);
54: PETSC_INTERN PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar*);
55: PETSC_INTERN PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar*);
56: PETSC_INTERN PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar);
57: PETSC_EXTERN PetscErrorCode VecCopy_SeqCUSP(Vec,Vec);
58: PETSC_INTERN PetscErrorCode VecSwap_SeqCUSP(Vec,Vec);
59: PETSC_INTERN PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec);
60: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec);
61: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec*);
62: PETSC_INTERN PetscErrorCode VecConjugate_SeqCUSP(Vec xin);
63: PETSC_INTERN PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*);
64: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU(Vec);
65: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck(Vec);
66: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUSP(Vec);
67: PETSC_EXTERN PetscErrorCode VecView_Seq(Vec,PetscViewer);
68: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP(Vec);
69: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec);
70: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom);
71: PETSC_INTERN PetscErrorCode VecGetLocalVector_SeqCUSP(Vec,Vec);
72: PETSC_INTERN PetscErrorCode VecRestoreLocalVector_SeqCUSP(Vec,Vec);
73: PETSC_INTERN PetscErrorCode VecCopy_SeqCUSP_Private(Vec xin,Vec yin);
74: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP_Private(Vec xin,PetscRandom r);
75: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP_Private(Vec v);
76: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP_Private(Vec vin);
77: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU_Public(Vec);
78: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck_Public(Vec);
79: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPUSome(Vec v, PetscCUSPIndices ci);
80: PETSC_INTERN PetscErrorCode VecCUSPCopyFromGPUSome(Vec v, PetscCUSPIndices ci);
83: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*);
84: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
85: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
86: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);
88: typedef enum {VEC_SCATTER_CUSP_STOS, VEC_SCATTER_CUSP_PTOP} VecCUSPScatterType;
89: typedef enum {VEC_SCATTER_CUSP_GENERAL, VEC_SCATTER_CUSP_STRIDED} VecCUSPSequentialScatterMode;
91: struct _p_VecScatterCUSPIndices_PtoP {
92: PetscInt ns;
93: PetscInt sendLowestIndex;
94: PetscInt nr;
95: PetscInt recvLowestIndex;
96: };
98: struct _p_VecScatterCUSPIndices_StoS {
99: /* from indices data */
100: PetscInt *fslots;
101: PetscInt fromFirst;
102: PetscInt fromStep;
103: VecCUSPSequentialScatterMode fromMode;
105: /* to indices data */
106: PetscInt *tslots;
107: PetscInt toFirst;
108: PetscInt toStep;
109: VecCUSPSequentialScatterMode toMode;
111: PetscInt n;
112: PetscInt MAX_BLOCKS;
113: PetscInt MAX_CORESIDENT_THREADS;
114: cudaStream_t stream;
115: };
117: struct _p_PetscCUSPIndices {
118: void * scatter;
119: VecCUSPScatterType scatterType;
120: };
122: #endif