Actual source code: cuspvecimpl.h
petsc-3.6.4 2016-04-12
4: #include <petsccusp.h>
5: #include <petsc/private/vecimpl.h>
7: #include <algorithm>
8: #include <vector>
9: #include <string>
11: #include <cublas.h>
12: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500
13: #include <cusp/blas/blas.h>
14: #else
15: #include <cusp/blas.h>
16: #endif
17: #include <thrust/host_vector.h>
18: #include <thrust/device_vector.h>
19: #include <thrust/iterator/constant_iterator.h>
20: #include <thrust/transform.h>
21: #include <thrust/iterator/permutation_iterator.h>
23: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
24: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory>
25: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
26: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>
28: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar*, PetscScalar*);
29: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec);
30: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec);
31: PETSC_INTERN PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar*);
32: PETSC_INTERN PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar);
33: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar*,Vec*);
34: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
35: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec);
36: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar*);
37: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP(Vec);
38: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar*);
39: PETSC_INTERN PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar*);
40: PETSC_INTERN PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar*);
41: PETSC_INTERN PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar);
42: PETSC_INTERN PetscErrorCode VecCopy_SeqCUSP(Vec,Vec);
43: PETSC_INTERN PetscErrorCode VecSwap_SeqCUSP(Vec,Vec);
44: PETSC_INTERN PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec);
45: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec);
46: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec*);
47: PETSC_INTERN PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*);
48: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU(Vec);
49: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck(Vec);
50: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheckHost(Vec);
51: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUSP(Vec);
52: PETSC_INTERN PetscErrorCode VecView_Seq(Vec,PetscViewer);
53: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP(Vec);
54: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec);
55: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom);
56: PETSC_INTERN PetscErrorCode VecGetLocalVector_SeqCUSP(Vec,Vec);
57: PETSC_INTERN PetscErrorCode VecRestoreLocalVector_SeqCUSP(Vec,Vec);
59: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU_Public(Vec);
60: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck_Public(Vec);
62: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)
64: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])
66: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0
68: struct Vec_CUSP {
69: CUSPARRAY *GPUarray; /* this always holds the GPU data */
70: cudaStream_t stream; /* A stream for doing asynchronous data transfers */
71: PetscBool hostDataRegisteredAsPageLocked;
72: };
74: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*);
75: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
76: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
77: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);
79: typedef enum {VEC_SCATTER_CUSP_STOS, VEC_SCATTER_CUSP_PTOP} VecCUSPScatterType;
80: typedef enum {VEC_SCATTER_CUSP_GENERAL, VEC_SCATTER_CUSP_STRIDED} VecCUSPSequentialScatterMode;
82: struct _p_VecScatterCUSPIndices_PtoP {
83: PetscInt ns;
84: PetscInt sendLowestIndex;
85: PetscInt nr;
86: PetscInt recvLowestIndex;
87: };
89: struct _p_VecScatterCUSPIndices_StoS {
90: /* from indices data */
91: PetscInt *fslots;
92: PetscInt fromFirst;
93: PetscInt fromStep;
94: VecCUSPSequentialScatterMode fromMode;
96: /* to indices data */
97: PetscInt *tslots;
98: PetscInt toFirst;
99: PetscInt toStep;
100: VecCUSPSequentialScatterMode toMode;
102: PetscInt n;
103: PetscInt MAX_BLOCKS;
104: PetscInt MAX_CORESIDENT_THREADS;
105: cudaStream_t stream;
106: };
108: struct _p_PetscCUSPIndices {
109: void * scatter;
110: VecCUSPScatterType scatterType;
111: };
113: #endif