Actual source code: cudavecimpl.h

petsc-3.7.1 2016-05-15
Report Typos and Errors
4: #if defined(__CUDACC__) 6: #include <petsccuda.h> 7: #include <petsc/private/vecimpl.h> 9: #include <cublas_v2.h> 11: #define WaitForGPU() PetscCUDASynchronize ? cudaThreadSynchronize() : 0 13: struct Vec_CUDA { 14: PetscScalar *GPUarray; /* this always holds the GPU data */ 15: cudaStream_t stream; /* A stream for doing asynchronous data transfers */ 16: PetscBool hostDataRegisteredAsPageLocked; 17: }; 19: #endif 21: #include <cuda_runtime.h> 23: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUDA(Vec,Vec,PetscScalar*, PetscScalar*); 24: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUDA(Vec,Vec,Vec); 25: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUDA(Vec,PetscScalar,Vec,Vec); 26: PETSC_INTERN PetscErrorCode VecMDot_SeqCUDA(Vec,PetscInt,const Vec[],PetscScalar*); 27: PETSC_INTERN PetscErrorCode VecSet_SeqCUDA(Vec,PetscScalar); 28: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUDA(Vec,PetscInt,const PetscScalar*,Vec*); 29: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUDA(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec); 30: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUDA(Vec,Vec,Vec); 31: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUDA(Vec,const PetscScalar*); 32: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUDA(Vec); 33: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUDA(Vec,const PetscScalar*); 34: PETSC_INTERN PetscErrorCode VecDot_SeqCUDA(Vec,Vec,PetscScalar*); 35: PETSC_INTERN PetscErrorCode VecTDot_SeqCUDA(Vec,Vec,PetscScalar*); 36: PETSC_INTERN PetscErrorCode VecScale_SeqCUDA(Vec,PetscScalar); 37: PETSC_EXTERN PetscErrorCode VecCopy_SeqCUDA(Vec,Vec); 38: PETSC_INTERN PetscErrorCode VecSwap_SeqCUDA(Vec,Vec); 39: PETSC_INTERN PetscErrorCode VecAXPY_SeqCUDA(Vec,PetscScalar,Vec); 40: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUDA(Vec,PetscScalar,PetscScalar,Vec); 41: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUDA(Vec,Vec*); 42: PETSC_INTERN PetscErrorCode VecConjugate_SeqCUDA(Vec xin); 43: PETSC_INTERN PetscErrorCode VecNorm_SeqCUDA(Vec,NormType,PetscReal*); 44: PETSC_INTERN PetscErrorCode VecCUDACopyToGPU(Vec); 45: PETSC_INTERN PetscErrorCode VecCUDAAllocateCheck(Vec); 46: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUDA(Vec); 47: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUDA(Vec); 48: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUDA(Vec,PetscScalar,Vec); 49: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUDA(Vec,PetscRandom); 50: PETSC_INTERN PetscErrorCode VecGetLocalVector_SeqCUDA(Vec,Vec); 51: PETSC_INTERN PetscErrorCode VecRestoreLocalVector_SeqCUDA(Vec,Vec); 52: PETSC_INTERN PetscErrorCode VecCopy_SeqCUDA_Private(Vec xin,Vec yin); 53: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r); 54: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUDA_Private(Vec v); 55: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUDA_Private(Vec vin); 56: PETSC_INTERN PetscErrorCode VecCUDACopyToGPU_Public(Vec); 57: PETSC_INTERN PetscErrorCode VecCUDAAllocateCheck_Public(Vec); 58: PETSC_INTERN PetscErrorCode VecCUDACopyToGPUSome(Vec v, PetscCUDAIndices ci); 59: PETSC_INTERN PetscErrorCode VecCUDACopyFromGPUSome(Vec v, PetscCUDAIndices ci); 61: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUDAIndices*); 62: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUDAIndices*); 63: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesDestroy(PetscCUDAIndices*); 64: PETSC_INTERN PetscErrorCode VecScatterCUDA_StoS(Vec,Vec,PetscCUDAIndices,InsertMode,ScatterMode); 66: typedef enum {VEC_SCATTER_CUDA_STOS, VEC_SCATTER_CUDA_PTOP} VecCUDAScatterType; 67: typedef enum {VEC_SCATTER_CUDA_GENERAL, VEC_SCATTER_CUDA_STRIDED} VecCUDASequentialScatterMode; 69: struct _p_VecScatterCUDAIndices_PtoP { 70: PetscInt ns; 71: PetscInt sendLowestIndex; 72: PetscInt nr; 73: PetscInt recvLowestIndex; 74: }; 76: struct _p_VecScatterCUDAIndices_StoS { 77: /* from indices data */ 78: PetscInt *fslots; 79: PetscInt fromFirst; 80: PetscInt fromStep; 81: VecCUDASequentialScatterMode fromMode; 83: /* to indices data */ 84: PetscInt *tslots; 85: PetscInt toFirst; 86: PetscInt toStep; 87: VecCUDASequentialScatterMode toMode; 89: PetscInt n; 90: PetscInt MAX_BLOCKS; 91: PetscInt MAX_CORESIDENT_THREADS; 92: cudaStream_t stream; 93: }; 95: struct _p_PetscCUDAIndices { 96: void * scatter; 97: VecCUDAScatterType scatterType; 98: }; 100: /* complex single */ 101: #if defined(PETSC_USE_COMPLEX) 102: #if defined(PETSC_USE_REAL_SINGLE) 103: #define cublasXaxpy(a,b,c,d,e,f,g) cublasCaxpy((a),(b),(cuComplex*)(c),(cuComplex*)(d),(e),(cuComplex*)(f),(g)) 104: #define cublasXscal(a,b,c,d,e) cublasCscal((a),(b),(cuComplex*)(c),(cuComplex*)(d),(e)) 105: #define cublasXdotu(a,b,c,d,e,f,g) cublasCdotu((a),(b),(cuComplex*)(c),(d),(cuComplex*)(e),(f),(cuComplex*)(g)) 106: #define cublasXdot(a,b,c,d,e,f,g) cublasCdotc((a),(b),(cuComplex*)(c),(d),(cuComplex*)(e),(f),(cuComplex*)(g)) 107: #define cublasXswap(a,b,c,d,e,f) cublasCswap((a),(b),(cuComplex*)(c),(d),(cuComplex*)(e),(f)) 108: #define cublasXnrm2(a,b,c,d,e) cublasScnrm2((a),(b),(cuComplex*)(c),(d),(e)) 109: #define cublasIXamax(a,b,c,d,e) cublasIcamax((a),(b),(cuComplex*)(c),(d),(e)) 110: #define cublasXasum(a,b,c,d,e) cublasScasum((a),(b),(cuComplex*)(c),(d),(e)) 111: #else /* complex double */ 112: #define cublasXaxpy(a,b,c,d,e,f,g) cublasZaxpy((a),(b),(cuDoubleComplex*)(c),(cuDoubleComplex*)(d),(e),(cuDoubleComplex*)(f),(g)) 113: #define cublasXscal(a,b,c,d,e) cublasZscal((a),(b),(cuDoubleComplex*)(c),(cuDoubleComplex*)(d),(e)) 114: #define cublasXdotu(a,b,c,d,e,f,g) cublasZdotu((a),(b),(cuDoubleComplex*)(c),(d),(cuDoubleComplex*)(e),(f),(cuDoubleComplex*)(g)) 115: #define cublasXdot(a,b,c,d,e,f,g) cublasZdotc((a),(b),(cuDoubleComplex*)(c),(d),(cuDoubleComplex*)(e),(f),(cuDoubleComplex*)(g)) 116: #define cublasXswap(a,b,c,d,e,f) cublasZswap((a),(b),(cuDoubleComplex*)(c),(d),(cuDoubleComplex*)(e),(f)) 117: #define cublasXnrm2(a,b,c,d,e) cublasDznrm2((a),(b),(cuDoubleComplex*)(c),(d),(e)) 118: #define cublasIXamax(a,b,c,d,e) cublasIzamax((a),(b),(cuDoubleComplex*)(c),(d),(e)) 119: #define cublasXasum(a,b,c,d,e) cublasDzasum((a),(b),(cuDoubleComplex*)(c),(d),(e)) 120: #endif 121: #else /* real single */ 122: #if defined(PETSC_USE_REAL_SINGLE) 123: #define cublasXaxpy cublasSaxpy 124: #define cublasXscal cublasSscal 125: #define cublasXdotu cublasSdot 126: #define cublasXdot cublasSdot 127: #define cublasXswap cublasSswap 128: #define cublasXnrm2 cublasSnrm2 129: #define cublasIXamax cublasIsamax 130: #define cublasXasum cublasSasum 131: #else /* real double */ 132: #define cublasXaxpy cublasDaxpy 133: #define cublasXscal cublasDscal 134: #define cublasXdotu cublasDdot 135: #define cublasXdot cublasDdot 136: #define cublasXswap cublasDswap 137: #define cublasXnrm2 cublasDnrm2 138: #define cublasIXamax cublasIdamax 139: #define cublasXasum cublasDasum 140: #endif 141: #endif 143: #endif