Actual source code: cuspvecimpl.h

petsc-3.6.4 2016-04-12
Report Typos and Errors
4: #include <petsccusp.h> 5: #include <petsc/private/vecimpl.h> 7: #include <algorithm> 8: #include <vector> 9: #include <string> 11: #include <cublas.h> 12: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500 13: #include <cusp/blas/blas.h> 14: #else 15: #include <cusp/blas.h> 16: #endif 17: #include <thrust/host_vector.h> 18: #include <thrust/device_vector.h> 19: #include <thrust/iterator/constant_iterator.h> 20: #include <thrust/transform.h> 21: #include <thrust/iterator/permutation_iterator.h> 23: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory> 24: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory> 25: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory> 26: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory> 28: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar*, PetscScalar*); 29: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec); 30: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec); 31: PETSC_INTERN PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar*); 32: PETSC_INTERN PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar); 33: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar*,Vec*); 34: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec); 35: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec); 36: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar*); 37: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP(Vec); 38: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar*); 39: PETSC_INTERN PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar*); 40: PETSC_INTERN PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar*); 41: PETSC_INTERN PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar); 42: PETSC_INTERN PetscErrorCode VecCopy_SeqCUSP(Vec,Vec); 43: PETSC_INTERN PetscErrorCode VecSwap_SeqCUSP(Vec,Vec); 44: PETSC_INTERN PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec); 45: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec); 46: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec*); 47: PETSC_INTERN PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*); 48: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU(Vec); 49: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck(Vec); 50: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheckHost(Vec); 51: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUSP(Vec); 52: PETSC_INTERN PetscErrorCode VecView_Seq(Vec,PetscViewer); 53: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP(Vec); 54: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec); 55: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom); 56: PETSC_INTERN PetscErrorCode VecGetLocalVector_SeqCUSP(Vec,Vec); 57: PETSC_INTERN PetscErrorCode VecRestoreLocalVector_SeqCUSP(Vec,Vec); 59: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU_Public(Vec); 60: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck_Public(Vec); 62: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err) 64: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0]) 66: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0 68: struct Vec_CUSP { 69: CUSPARRAY *GPUarray; /* this always holds the GPU data */ 70: cudaStream_t stream; /* A stream for doing asynchronous data transfers */ 71: PetscBool hostDataRegisteredAsPageLocked; 72: }; 74: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*); 75: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*); 76: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*); 77: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode); 79: typedef enum {VEC_SCATTER_CUSP_STOS, VEC_SCATTER_CUSP_PTOP} VecCUSPScatterType; 80: typedef enum {VEC_SCATTER_CUSP_GENERAL, VEC_SCATTER_CUSP_STRIDED} VecCUSPSequentialScatterMode; 82: struct _p_VecScatterCUSPIndices_PtoP { 83: PetscInt ns; 84: PetscInt sendLowestIndex; 85: PetscInt nr; 86: PetscInt recvLowestIndex; 87: }; 89: struct _p_VecScatterCUSPIndices_StoS { 90: /* from indices data */ 91: PetscInt *fslots; 92: PetscInt fromFirst; 93: PetscInt fromStep; 94: VecCUSPSequentialScatterMode fromMode; 96: /* to indices data */ 97: PetscInt *tslots; 98: PetscInt toFirst; 99: PetscInt toStep; 100: VecCUSPSequentialScatterMode toMode; 102: PetscInt n; 103: PetscInt MAX_BLOCKS; 104: PetscInt MAX_CORESIDENT_THREADS; 105: cudaStream_t stream; 106: }; 108: struct _p_PetscCUSPIndices { 109: void * scatter; 110: VecCUSPScatterType scatterType; 111: }; 113: #endif