Actual source code: vecviennacl.cxx
petsc-3.14.3 2021-01-09
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include <vector>
12: #include "viennacl/linalg/inner_prod.hpp"
13: #include "viennacl/linalg/norm_1.hpp"
14: #include "viennacl/linalg/norm_2.hpp"
15: #include "viennacl/linalg/norm_inf.hpp"
17: #ifdef VIENNACL_WITH_OPENCL
18: #include "viennacl/ocl/backend.hpp"
19: #endif
22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
23: {
28: *a = 0;
29: VecViennaCLCopyToGPU(v);
30: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
31: ViennaCLWaitForGPU();
32: return(0);
33: }
35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
36: {
41: v->offloadmask = PETSC_OFFLOAD_GPU;
43: PetscObjectStateIncrease((PetscObject)v);
44: return(0);
45: }
47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
48: {
53: *a = 0;
54: VecViennaCLCopyToGPU(v);
55: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
56: ViennaCLWaitForGPU();
57: return(0);
58: }
60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
61: {
64: return(0);
65: }
67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
68: {
73: *a = 0;
74: VecViennaCLAllocateCheck(v);
75: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
76: ViennaCLWaitForGPU();
77: return(0);
78: }
80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
81: {
86: v->offloadmask = PETSC_OFFLOAD_GPU;
88: PetscObjectStateIncrease((PetscObject)v);
89: return(0);
90: }
94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
95: {
96: PetscErrorCode ierr;
97: char string[20];
98: PetscBool flg,flg_cuda,flg_opencl,flg_openmp;
101: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102: PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);
103: if (flg) {
104: try {
105: PetscStrcasecmp(string,"cuda",&flg_cuda);
106: PetscStrcasecmp(string,"opencl",&flg_opencl);
107: PetscStrcasecmp(string,"openmp",&flg_openmp);
109: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118: } catch (std::exception const & ex) {
119: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120: }
121: }
123: #if defined(PETSC_HAVE_OPENCL)
124: /* ViennaCL OpenCL device type configuration */
125: PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);
126: if (flg) {
127: try {
128: PetscStrcasecmp(string,"cpu",&flg);
129: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
131: PetscStrcasecmp(string,"gpu",&flg);
132: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
134: PetscStrcasecmp(string,"accelerator",&flg);
135: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136: } catch (std::exception const & ex) {
137: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138: }
139: }
140: #endif
142: /* Print available backends */
143: PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144: if (flg) {
145: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157: PetscPrintf(PETSC_COMM_WORLD, "\n");
159: /* Print selected backends */
160: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163: PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164: }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168: PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169: }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174: }
175: #else
176: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178: }
179: #endif
180: PetscPrintf(PETSC_COMM_WORLD, "\n");
181: }
182: return(0);
183: }
185: /*
186: Allocates space for the vector array on the Host if it does not exist.
187: Does NOT change the PetscViennaCLFlag for the vector
188: Does NOT zero the ViennaCL array
189: */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193: PetscScalar *array;
194: Vec_Seq *s;
195: PetscInt n = v->map->n;
198: s = (Vec_Seq*)v->data;
199: VecViennaCLAllocateCheck(v);
200: if (s->array == 0) {
201: PetscMalloc1(n,&array);
202: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203: s->array = array;
204: s->array_allocated = array;
205: }
206: return(0);
207: }
210: /*
211: Allocates space for the vector array on the GPU if it does not exist.
212: Does NOT change the PetscViennaCLFlag for the vector
213: Does NOT zero the ViennaCL array
215: */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219: if (!v->spptr) {
220: try {
221: v->spptr = new Vec_ViennaCL;
222: ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
223: ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
225: } catch(std::exception const & ex) {
226: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
227: }
228: }
229: return(0);
230: }
233: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
234: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
235: {
240: VecViennaCLAllocateCheck(v);
241: if (v->map->n > 0) {
242: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
243: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
244: try {
245: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
246: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
247: ViennaCLWaitForGPU();
248: } catch(std::exception const & ex) {
249: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
250: }
251: PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
252: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
253: v->offloadmask = PETSC_OFFLOAD_BOTH;
254: }
255: }
256: return(0);
257: }
261: /*
262: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
263: */
264: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
265: {
270: VecViennaCLAllocateCheckHost(v);
271: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
272: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
273: try {
274: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
275: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
276: ViennaCLWaitForGPU();
277: } catch(std::exception const & ex) {
278: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
279: }
280: PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
281: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
282: v->offloadmask = PETSC_OFFLOAD_BOTH;
283: }
284: return(0);
285: }
288: /* Copy on CPU */
289: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
290: {
291: PetscScalar *ya;
292: const PetscScalar *xa;
293: PetscErrorCode ierr;
296: VecViennaCLAllocateCheckHost(xin);
297: VecViennaCLAllocateCheckHost(yin);
298: if (xin != yin) {
299: VecGetArrayRead(xin,&xa);
300: VecGetArray(yin,&ya);
301: PetscArraycpy(ya,xa,xin->map->n);
302: VecRestoreArrayRead(xin,&xa);
303: VecRestoreArray(yin,&ya);
304: }
305: return(0);
306: }
308: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
309: {
311: PetscInt n = xin->map->n,i;
312: PetscScalar *xx;
315: VecGetArray(xin,&xx);
316: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
317: VecRestoreArray(xin,&xx);
318: return(0);
319: }
321: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
322: {
323: Vec_Seq *vs = (Vec_Seq*)v->data;
327: PetscObjectSAWsViewOff(v);
328: #if defined(PETSC_USE_LOG)
329: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
330: #endif
331: if (vs->array_allocated) { PetscFree(vs->array_allocated); }
332: PetscFree(vs);
333: return(0);
334: }
336: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
337: {
338: Vec_Seq *v = (Vec_Seq*)vin->data;
341: v->array = v->unplacedarray;
342: v->unplacedarray = 0;
343: return(0);
344: }
347: /*MC
348: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
350: Options Database Keys:
351: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
353: Level: beginner
355: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
356: M*/
359: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
360: {
361: const ViennaCLVector *xgpu;
362: ViennaCLVector *ygpu;
363: PetscErrorCode ierr;
366: VecViennaCLGetArrayRead(xin,&xgpu);
367: VecViennaCLGetArray(yin,&ygpu);
368: PetscLogGpuTimeBegin();
369: try {
370: if (alpha != 0.0 && xin->map->n > 0) {
371: *ygpu = *xgpu + alpha * *ygpu;
372: PetscLogGpuFlops(2.0*yin->map->n);
373: } else {
374: *ygpu = *xgpu;
375: }
376: ViennaCLWaitForGPU();
377: } catch(std::exception const & ex) {
378: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
379: }
380: PetscLogGpuTimeEnd();
381: VecViennaCLRestoreArrayRead(xin,&xgpu);
382: VecViennaCLRestoreArray(yin,&ygpu);
383: return(0);
384: }
387: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
388: {
389: const ViennaCLVector *xgpu;
390: ViennaCLVector *ygpu;
391: PetscErrorCode ierr;
394: if (alpha != 0.0 && xin->map->n > 0) {
395: VecViennaCLGetArrayRead(xin,&xgpu);
396: VecViennaCLGetArray(yin,&ygpu);
397: PetscLogGpuTimeBegin();
398: try {
399: *ygpu += alpha * *xgpu;
400: ViennaCLWaitForGPU();
401: } catch(std::exception const & ex) {
402: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
403: }
404: PetscLogGpuTimeEnd();
405: VecViennaCLRestoreArrayRead(xin,&xgpu);
406: VecViennaCLRestoreArray(yin,&ygpu);
407: PetscLogGpuFlops(2.0*yin->map->n);
408: }
409: return(0);
410: }
413: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
414: {
415: const ViennaCLVector *xgpu,*ygpu;
416: ViennaCLVector *wgpu;
417: PetscErrorCode ierr;
420: if (xin->map->n > 0) {
421: VecViennaCLGetArrayRead(xin,&xgpu);
422: VecViennaCLGetArrayRead(yin,&ygpu);
423: VecViennaCLGetArrayWrite(win,&wgpu);
424: PetscLogGpuTimeBegin();
425: try {
426: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
427: ViennaCLWaitForGPU();
428: } catch(std::exception const & ex) {
429: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
430: }
431: PetscLogGpuTimeEnd();
432: PetscLogGpuFlops(win->map->n);
433: VecViennaCLRestoreArrayRead(xin,&xgpu);
434: VecViennaCLRestoreArrayRead(yin,&ygpu);
435: VecViennaCLRestoreArrayWrite(win,&wgpu);
436: }
437: return(0);
438: }
441: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
442: {
443: const ViennaCLVector *xgpu,*ygpu;
444: ViennaCLVector *wgpu;
445: PetscErrorCode ierr;
448: if (alpha == 0.0 && xin->map->n > 0) {
449: VecCopy_SeqViennaCL(yin,win);
450: } else {
451: VecViennaCLGetArrayRead(xin,&xgpu);
452: VecViennaCLGetArrayRead(yin,&ygpu);
453: VecViennaCLGetArrayWrite(win,&wgpu);
454: PetscLogGpuTimeBegin();
455: if (alpha == 1.0) {
456: try {
457: *wgpu = *ygpu + *xgpu;
458: } catch(std::exception const & ex) {
459: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
460: }
461: PetscLogGpuFlops(win->map->n);
462: } else if (alpha == -1.0) {
463: try {
464: *wgpu = *ygpu - *xgpu;
465: } catch(std::exception const & ex) {
466: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
467: }
468: PetscLogGpuFlops(win->map->n);
469: } else {
470: try {
471: *wgpu = *ygpu + alpha * *xgpu;
472: } catch(std::exception const & ex) {
473: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
474: }
475: PetscLogGpuFlops(2*win->map->n);
476: }
477: ViennaCLWaitForGPU();
478: PetscLogGpuTimeEnd();
479: VecViennaCLRestoreArrayRead(xin,&xgpu);
480: VecViennaCLRestoreArrayRead(yin,&ygpu);
481: VecViennaCLRestoreArrayWrite(win,&wgpu);
482: }
483: return(0);
484: }
487: /*
488: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
489: *
490: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
491: * hence there is an iterated application of these until the final result is obtained
492: */
493: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
494: {
496: PetscInt j;
499: for (j = 0; j < nv; ++j) {
500: if (j+1 < nv) {
501: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
502: ++j;
503: } else {
504: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
505: }
506: }
507: ViennaCLWaitForGPU();
508: return(0);
509: }
512: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
513: {
514: const ViennaCLVector *xgpu,*ygpu;
515: PetscErrorCode ierr;
518: if (xin->map->n > 0) {
519: VecViennaCLGetArrayRead(xin,&xgpu);
520: VecViennaCLGetArrayRead(yin,&ygpu);
521: PetscLogGpuTimeBegin();
522: try {
523: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
524: } catch(std::exception const & ex) {
525: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
526: }
527: ViennaCLWaitForGPU();
528: PetscLogGpuTimeEnd();
529: if (xin->map->n >0) {
530: PetscLogGpuFlops(2.0*xin->map->n-1);
531: }
532: VecViennaCLRestoreArrayRead(xin,&xgpu);
533: VecViennaCLRestoreArrayRead(yin,&ygpu);
534: } else *z = 0.0;
535: return(0);
536: }
540: /*
541: * Operation z[j] = dot(x, y[j])
542: *
543: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
544: */
545: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
546: {
547: PetscErrorCode ierr;
548: PetscInt n = xin->map->n,i;
549: const ViennaCLVector *xgpu,*ygpu;
550: Vec *yyin = (Vec*)yin;
551: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
554: if (xin->map->n > 0) {
555: VecViennaCLGetArrayRead(xin,&xgpu);
556: for (i=0; i<nv; i++) {
557: VecViennaCLGetArrayRead(yyin[i],&ygpu);
558: ygpu_array[i] = ygpu;
559: }
560: PetscLogGpuTimeBegin();
561: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
562: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
563: viennacl::copy(result.begin(), result.end(), z);
564: for (i=0; i<nv; i++) {
565: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
566: }
567: ViennaCLWaitForGPU();
568: PetscLogGpuTimeEnd();
569: VecViennaCLRestoreArrayRead(xin,&xgpu);
570: PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
571: } else {
572: for (i=0; i<nv; i++) z[i] = 0.0;
573: }
574: return(0);
575: }
577: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
578: {
582: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
583: VecMDot_SeqViennaCL(xin,nv,yin,z);
584: ViennaCLWaitForGPU();
585: return(0);
586: }
589: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
590: {
591: ViennaCLVector *xgpu;
595: if (xin->map->n > 0) {
596: VecViennaCLGetArrayWrite(xin,&xgpu);
597: PetscLogGpuTimeBegin();
598: try {
599: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
600: ViennaCLWaitForGPU();
601: } catch(std::exception const & ex) {
602: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
603: }
604: PetscLogGpuTimeEnd();
605: VecViennaCLRestoreArrayWrite(xin,&xgpu);
606: }
607: return(0);
608: }
610: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
611: {
612: ViennaCLVector *xgpu;
616: if (alpha == 0.0 && xin->map->n > 0) {
617: VecSet_SeqViennaCL(xin,alpha);
618: PetscLogGpuFlops(xin->map->n);
619: } else if (alpha != 1.0 && xin->map->n > 0) {
620: VecViennaCLGetArray(xin,&xgpu);
621: PetscLogGpuTimeBegin();
622: try {
623: *xgpu *= alpha;
624: ViennaCLWaitForGPU();
625: } catch(std::exception const & ex) {
626: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
627: }
628: PetscLogGpuTimeEnd();
629: VecViennaCLRestoreArray(xin,&xgpu);
630: PetscLogGpuFlops(xin->map->n);
631: }
632: return(0);
633: }
636: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
637: {
641: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
642: VecDot_SeqViennaCL(xin, yin, z);
643: ViennaCLWaitForGPU();
644: return(0);
645: }
648: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
649: {
650: const ViennaCLVector *xgpu;
651: ViennaCLVector *ygpu;
652: PetscErrorCode ierr;
655: if (xin != yin && xin->map->n > 0) {
656: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
657: VecViennaCLGetArrayRead(xin,&xgpu);
658: VecViennaCLGetArrayWrite(yin,&ygpu);
659: PetscLogGpuTimeBegin();
660: try {
661: *ygpu = *xgpu;
662: ViennaCLWaitForGPU();
663: } catch(std::exception const & ex) {
664: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
665: }
666: PetscLogGpuTimeEnd();
667: VecViennaCLRestoreArrayRead(xin,&xgpu);
668: VecViennaCLRestoreArrayWrite(yin,&ygpu);
670: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
671: /* copy in CPU if we are on the CPU*/
672: VecCopy_SeqViennaCL_Private(xin,yin);
673: ViennaCLWaitForGPU();
674: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
675: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
676: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
677: /* copy in CPU */
678: VecCopy_SeqViennaCL_Private(xin,yin);
679: ViennaCLWaitForGPU();
680: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
681: /* copy in GPU */
682: VecViennaCLGetArrayRead(xin,&xgpu);
683: VecViennaCLGetArrayWrite(yin,&ygpu);
684: PetscLogGpuTimeBegin();
685: try {
686: *ygpu = *xgpu;
687: ViennaCLWaitForGPU();
688: } catch(std::exception const & ex) {
689: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
690: }
691: PetscLogGpuTimeEnd();
692: VecViennaCLRestoreArrayRead(xin,&xgpu);
693: VecViennaCLRestoreArrayWrite(yin,&ygpu);
694: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
695: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
696: default to copy in GPU (this is an arbitrary choice) */
697: VecViennaCLGetArrayRead(xin,&xgpu);
698: VecViennaCLGetArrayWrite(yin,&ygpu);
699: PetscLogGpuTimeBegin();
700: try {
701: *ygpu = *xgpu;
702: ViennaCLWaitForGPU();
703: } catch(std::exception const & ex) {
704: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
705: }
706: PetscLogGpuTimeEnd();
707: VecViennaCLRestoreArrayRead(xin,&xgpu);
708: VecViennaCLRestoreArrayWrite(yin,&ygpu);
709: } else {
710: VecCopy_SeqViennaCL_Private(xin,yin);
711: ViennaCLWaitForGPU();
712: }
713: }
714: }
715: return(0);
716: }
719: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
720: {
722: ViennaCLVector *xgpu,*ygpu;
725: if (xin != yin && xin->map->n > 0) {
726: VecViennaCLGetArray(xin,&xgpu);
727: VecViennaCLGetArray(yin,&ygpu);
728: PetscLogGpuTimeBegin();
729: try {
730: viennacl::swap(*xgpu, *ygpu);
731: ViennaCLWaitForGPU();
732: } catch(std::exception const & ex) {
733: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
734: }
735: PetscLogGpuTimeEnd();
736: VecViennaCLRestoreArray(xin,&xgpu);
737: VecViennaCLRestoreArray(yin,&ygpu);
738: }
739: return(0);
740: }
743: // y = alpha * x + beta * y
744: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
745: {
746: PetscErrorCode ierr;
747: PetscScalar a = alpha,b = beta;
748: const ViennaCLVector *xgpu;
749: ViennaCLVector *ygpu;
752: if (a == 0.0 && xin->map->n > 0) {
753: VecScale_SeqViennaCL(yin,beta);
754: } else if (b == 1.0 && xin->map->n > 0) {
755: VecAXPY_SeqViennaCL(yin,alpha,xin);
756: } else if (a == 1.0 && xin->map->n > 0) {
757: VecAYPX_SeqViennaCL(yin,beta,xin);
758: } else if (b == 0.0 && xin->map->n > 0) {
759: VecViennaCLGetArrayRead(xin,&xgpu);
760: VecViennaCLGetArray(yin,&ygpu);
761: PetscLogGpuTimeBegin();
762: try {
763: *ygpu = *xgpu * alpha;
764: ViennaCLWaitForGPU();
765: } catch(std::exception const & ex) {
766: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
767: }
768: PetscLogGpuTimeEnd();
769: PetscLogGpuFlops(xin->map->n);
770: VecViennaCLRestoreArrayRead(xin,&xgpu);
771: VecViennaCLRestoreArray(yin,&ygpu);
772: } else if (xin->map->n > 0) {
773: VecViennaCLGetArrayRead(xin,&xgpu);
774: VecViennaCLGetArray(yin,&ygpu);
775: PetscLogGpuTimeBegin();
776: try {
777: *ygpu = *xgpu * alpha + *ygpu * beta;
778: ViennaCLWaitForGPU();
779: } catch(std::exception const & ex) {
780: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
781: }
782: PetscLogGpuTimeEnd();
783: VecViennaCLRestoreArrayRead(xin,&xgpu);
784: VecViennaCLRestoreArray(yin,&ygpu);
785: PetscLogGpuFlops(3.0*xin->map->n);
786: }
787: return(0);
788: }
791: /* operation z = alpha * x + beta *y + gamma *z*/
792: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
793: {
794: PetscErrorCode ierr;
795: PetscInt n = zin->map->n;
796: const ViennaCLVector *xgpu,*ygpu;
797: ViennaCLVector *zgpu;
800: VecViennaCLGetArrayRead(xin,&xgpu);
801: VecViennaCLGetArrayRead(yin,&ygpu);
802: VecViennaCLGetArray(zin,&zgpu);
803: if (alpha == 0.0 && xin->map->n > 0) {
804: PetscLogGpuTimeBegin();
805: try {
806: if (beta == 0.0) {
807: *zgpu = gamma * *zgpu;
808: ViennaCLWaitForGPU();
809: PetscLogGpuFlops(1.0*n);
810: } else if (gamma == 0.0) {
811: *zgpu = beta * *ygpu;
812: ViennaCLWaitForGPU();
813: PetscLogGpuFlops(1.0*n);
814: } else {
815: *zgpu = beta * *ygpu + gamma * *zgpu;
816: ViennaCLWaitForGPU();
817: PetscLogGpuFlops(3.0*n);
818: }
819: } catch(std::exception const & ex) {
820: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
821: }
822: PetscLogGpuTimeEnd();
823: PetscLogGpuFlops(3.0*n);
824: } else if (beta == 0.0 && xin->map->n > 0) {
825: PetscLogGpuTimeBegin();
826: try {
827: if (gamma == 0.0) {
828: *zgpu = alpha * *xgpu;
829: ViennaCLWaitForGPU();
830: PetscLogGpuFlops(1.0*n);
831: } else {
832: *zgpu = alpha * *xgpu + gamma * *zgpu;
833: ViennaCLWaitForGPU();
834: PetscLogGpuFlops(3.0*n);
835: }
836: } catch(std::exception const & ex) {
837: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
838: }
839: PetscLogGpuTimeEnd();
840: } else if (gamma == 0.0 && xin->map->n > 0) {
841: PetscLogGpuTimeBegin();
842: try {
843: *zgpu = alpha * *xgpu + beta * *ygpu;
844: ViennaCLWaitForGPU();
845: } catch(std::exception const & ex) {
846: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
847: }
848: PetscLogGpuTimeEnd();
849: PetscLogGpuFlops(3.0*n);
850: } else if (xin->map->n > 0) {
851: PetscLogGpuTimeBegin();
852: try {
853: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
854: if (gamma != 1.0)
855: *zgpu *= gamma;
856: *zgpu += alpha * *xgpu + beta * *ygpu;
857: ViennaCLWaitForGPU();
858: } catch(std::exception const & ex) {
859: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
860: }
861: PetscLogGpuTimeEnd();
862: VecViennaCLRestoreArray(zin,&zgpu);
863: VecViennaCLRestoreArrayRead(xin,&xgpu);
864: VecViennaCLRestoreArrayRead(yin,&ygpu);
865: PetscLogGpuFlops(5.0*n);
866: }
867: return(0);
868: }
870: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
871: {
872: PetscErrorCode ierr;
873: PetscInt n = win->map->n;
874: const ViennaCLVector *xgpu,*ygpu;
875: ViennaCLVector *wgpu;
878: if (xin->map->n > 0) {
879: VecViennaCLGetArrayRead(xin,&xgpu);
880: VecViennaCLGetArrayRead(yin,&ygpu);
881: VecViennaCLGetArray(win,&wgpu);
882: PetscLogGpuTimeBegin();
883: try {
884: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
885: ViennaCLWaitForGPU();
886: } catch(std::exception const & ex) {
887: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
888: }
889: PetscLogGpuTimeEnd();
890: VecViennaCLRestoreArrayRead(xin,&xgpu);
891: VecViennaCLRestoreArrayRead(yin,&ygpu);
892: VecViennaCLRestoreArray(win,&wgpu);
893: PetscLogGpuFlops(n);
894: }
895: return(0);
896: }
899: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
900: {
901: PetscErrorCode ierr;
902: PetscInt n = xin->map->n;
903: PetscBLASInt bn;
904: const ViennaCLVector *xgpu;
907: if (xin->map->n > 0) {
908: PetscBLASIntCast(n,&bn);
909: VecViennaCLGetArrayRead(xin,&xgpu);
910: if (type == NORM_2 || type == NORM_FROBENIUS) {
911: PetscLogGpuTimeBegin();
912: try {
913: *z = viennacl::linalg::norm_2(*xgpu);
914: ViennaCLWaitForGPU();
915: } catch(std::exception const & ex) {
916: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
917: }
918: PetscLogGpuTimeEnd();
919: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
920: } else if (type == NORM_INFINITY) {
921: PetscLogGpuTimeBegin();
922: try {
923: *z = viennacl::linalg::norm_inf(*xgpu);
924: ViennaCLWaitForGPU();
925: } catch(std::exception const & ex) {
926: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
927: }
928: PetscLogGpuTimeEnd();
929: } else if (type == NORM_1) {
930: PetscLogGpuTimeBegin();
931: try {
932: *z = viennacl::linalg::norm_1(*xgpu);
933: ViennaCLWaitForGPU();
934: } catch(std::exception const & ex) {
935: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
936: }
937: PetscLogGpuTimeEnd();
938: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
939: } else if (type == NORM_1_AND_2) {
940: PetscLogGpuTimeBegin();
941: try {
942: *z = viennacl::linalg::norm_1(*xgpu);
943: *(z+1) = viennacl::linalg::norm_2(*xgpu);
944: ViennaCLWaitForGPU();
945: } catch(std::exception const & ex) {
946: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
947: }
948: PetscLogGpuTimeEnd();
949: PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
950: PetscLogGpuFlops(PetscMax(n-1.0,0.0));
951: }
952: VecViennaCLRestoreArrayRead(xin,&xgpu);
953: } else if (type == NORM_1_AND_2) {
954: *z = 0.0;
955: *(z+1) = 0.0;
956: } else *z = 0.0;
957: return(0);
958: }
961: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
962: {
966: VecSetRandom_SeqViennaCL_Private(xin,r);
967: xin->offloadmask = PETSC_OFFLOAD_CPU;
968: return(0);
969: }
971: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
972: {
977: VecViennaCLCopyFromGPU(vin);
978: VecResetArray_SeqViennaCL_Private(vin);
979: vin->offloadmask = PETSC_OFFLOAD_CPU;
980: return(0);
981: }
983: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
984: {
989: VecViennaCLCopyFromGPU(vin);
990: VecPlaceArray_Seq(vin,a);
991: vin->offloadmask = PETSC_OFFLOAD_CPU;
992: return(0);
993: }
995: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
996: {
1001: VecViennaCLCopyFromGPU(vin);
1002: VecReplaceArray_Seq(vin,a);
1003: vin->offloadmask = PETSC_OFFLOAD_CPU;
1004: return(0);
1005: }
1008: /*@C
1009: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
1011: Collective
1013: Input Parameter:
1014: + comm - the communicator, should be PETSC_COMM_SELF
1015: - n - the vector length
1017: Output Parameter:
1018: . V - the vector
1020: Notes:
1021: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1022: same type as an existing vector.
1024: Level: intermediate
1026: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1027: @*/
1028: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1029: {
1033: VecCreate(comm,v);
1034: VecSetSizes(*v,n,n);
1035: VecSetType(*v,VECSEQVIENNACL);
1036: return(0);
1037: }
1040: /*@C
1041: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
1042: where the user provides the array space to store the vector values.
1044: Collective
1046: Input Parameter:
1047: + comm - the communicator, should be PETSC_COMM_SELF
1048: . bs - the block size
1049: . n - the vector length
1050: - array - viennacl array where the vector elements are to be stored.
1052: Output Parameter:
1053: . V - the vector
1055: Notes:
1056: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1057: same type as an existing vector.
1059: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
1060: at a later stage to SET the array for storing the vector values.
1062: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1063: The user should not free the array until the vector is destroyed.
1065: Level: intermediate
1067: .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
1068: VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
1069: VecCreateMPIWithArray()
1070: @*/
1071: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
1072: {
1074: PetscMPIInt size;
1077: VecCreate(comm,V);
1078: VecSetSizes(*V,n,n);
1079: VecSetBlockSize(*V,bs);
1080: MPI_Comm_size(comm,&size);
1081: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1082: VecCreate_SeqViennaCL_Private(*V,array);
1083: return(0);
1084: }
1086: /*@C
1087: VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
1088: the user provides the array space to store the vector values.
1090: Collective
1092: Input Parameter:
1093: + comm - the communicator, should be PETSC_COMM_SELF
1094: . bs - the block size
1095: . n - the vector length
1096: - cpuarray - CPU memory where the vector elements are to be stored.
1097: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1099: Output Parameter:
1100: . V - the vector
1102: Notes:
1103: If both cpuarray and viennaclvec are provided, the caller must ensure that
1104: the provided arrays have identical values.
1106: PETSc does NOT free the provided arrays when the vector is destroyed via
1107: VecDestroy(). The user should not free the array until the vector is
1108: destroyed.
1110: Level: intermediate
1112: .seealso: VecCreateMPIViennaCLWithArrays(), VecCreate(), VecCreateSeqWithArray(),
1113: VecViennaCLPlaceArray(), VecPlaceArray(), VecCreateSeqCUDAWithArrays(),
1114: VecViennaCLAllocateCheckHost()
1115: @*/
1116: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector* viennaclvec,Vec *V)
1117: {
1119: PetscMPIInt size;
1123: MPI_Comm_size(comm,&size);
1124: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1126: // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1127: VecCreateSeqViennaCLWithArray(comm,bs,n,viennaclvec,V);
1129: if (cpuarray && viennaclvec) {
1130: Vec_Seq *s = (Vec_Seq*)((*V)->data);
1131: s->array = (PetscScalar*)cpuarray;
1132: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1133: } else if (cpuarray) {
1134: Vec_Seq *s = (Vec_Seq*)((*V)->data);
1135: s->array = (PetscScalar*)cpuarray;
1136: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1137: } else if (viennaclvec) {
1138: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1139: } else {
1140: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1141: }
1143: return(0);
1144: }
1146: /*@C
1147: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1148: the one provided by the user. This is useful to avoid a copy.
1150: Not Collective
1152: Input Parameters:
1153: + vec - the vector
1154: - array - the ViennaCL vector
1156: Notes:
1157: You can return to the original viennacl vector with a call to
1158: VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1159: and VecPlaceArray() at the same time on the same vector.
1161: Level: intermediate
1163: .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1164: VecCUDAPlaceArray(),
1166: @*/
1167: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1168: {
1173: VecViennaCLCopyToGPU(vin);
1174: if (((Vec_Seq*)vin->data)->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1175: ((Vec_Seq*)vin->data)->unplacedarray = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1176: ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1177: vin->offloadmask = PETSC_OFFLOAD_GPU;
1178: PetscObjectStateIncrease((PetscObject)vin);
1179: return(0);
1180: }
1182: /*@C
1183: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1184: after the use of VecViennaCLPlaceArray().
1186: Not Collective
1188: Input Parameters:
1189: . vec - the vector
1191: Level: developer
1193: .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1194: @*/
1195: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1196: {
1201: VecViennaCLCopyToGPU(vin);
1202: ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1203: ((Vec_Seq*)vin->data)->unplacedarray = 0;
1204: vin->offloadmask = PETSC_OFFLOAD_GPU;
1205: PetscObjectStateIncrease((PetscObject)vin);
1206: return(0);
1207: }
1210: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1211: *
1212: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1213: */
1214: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1215: {
1216: PetscErrorCode ierr;
1219: VecDot_SeqViennaCL(s,t,dp);
1220: VecNorm_SeqViennaCL(t,NORM_2,nm);
1221: *nm *= *nm; //squared norm required
1222: return(0);
1223: }
1225: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1226: {
1230: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1231: PetscLayoutReference(win->map,&(*V)->map);
1232: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1233: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1234: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1235: return(0);
1236: }
1238: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1239: {
1243: try {
1244: if (v->spptr) {
1245: delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1246: delete (Vec_ViennaCL*) v->spptr;
1247: }
1248: } catch(char *ex) {
1249: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1250: }
1251: VecDestroy_SeqViennaCL_Private(v);
1252: return(0);
1253: }
1255: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1256: {
1260: V->boundtocpu = flg;
1261: if (flg) {
1262: VecViennaCLCopyFromGPU(V);
1263: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1264: V->ops->dot = VecDot_Seq;
1265: V->ops->norm = VecNorm_Seq;
1266: V->ops->tdot = VecTDot_Seq;
1267: V->ops->scale = VecScale_Seq;
1268: V->ops->copy = VecCopy_Seq;
1269: V->ops->set = VecSet_Seq;
1270: V->ops->swap = VecSwap_Seq;
1271: V->ops->axpy = VecAXPY_Seq;
1272: V->ops->axpby = VecAXPBY_Seq;
1273: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1274: V->ops->pointwisemult = VecPointwiseMult_Seq;
1275: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1276: V->ops->setrandom = VecSetRandom_Seq;
1277: V->ops->dot_local = VecDot_Seq;
1278: V->ops->tdot_local = VecTDot_Seq;
1279: V->ops->norm_local = VecNorm_Seq;
1280: V->ops->mdot_local = VecMDot_Seq;
1281: V->ops->mtdot_local = VecMTDot_Seq;
1282: V->ops->maxpy = VecMAXPY_Seq;
1283: V->ops->mdot = VecMDot_Seq;
1284: V->ops->mtdot = VecMTDot_Seq;
1285: V->ops->aypx = VecAYPX_Seq;
1286: V->ops->waxpy = VecWAXPY_Seq;
1287: V->ops->dotnorm2 = NULL;
1288: V->ops->placearray = VecPlaceArray_Seq;
1289: V->ops->replacearray = VecReplaceArray_Seq;
1290: V->ops->resetarray = VecResetArray_Seq;
1291: V->ops->duplicate = VecDuplicate_Seq;
1292: } else {
1293: V->ops->dot = VecDot_SeqViennaCL;
1294: V->ops->norm = VecNorm_SeqViennaCL;
1295: V->ops->tdot = VecTDot_SeqViennaCL;
1296: V->ops->scale = VecScale_SeqViennaCL;
1297: V->ops->copy = VecCopy_SeqViennaCL;
1298: V->ops->set = VecSet_SeqViennaCL;
1299: V->ops->swap = VecSwap_SeqViennaCL;
1300: V->ops->axpy = VecAXPY_SeqViennaCL;
1301: V->ops->axpby = VecAXPBY_SeqViennaCL;
1302: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1303: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1304: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1305: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1306: V->ops->dot_local = VecDot_SeqViennaCL;
1307: V->ops->tdot_local = VecTDot_SeqViennaCL;
1308: V->ops->norm_local = VecNorm_SeqViennaCL;
1309: V->ops->mdot_local = VecMDot_SeqViennaCL;
1310: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1311: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1312: V->ops->mdot = VecMDot_SeqViennaCL;
1313: V->ops->mtdot = VecMTDot_SeqViennaCL;
1314: V->ops->aypx = VecAYPX_SeqViennaCL;
1315: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1316: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1317: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1318: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1319: V->ops->resetarray = VecResetArray_SeqViennaCL;
1320: V->ops->destroy = VecDestroy_SeqViennaCL;
1321: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1322: }
1323: return(0);
1324: }
1326: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1327: {
1329: PetscMPIInt size;
1332: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1333: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1334: VecCreate_Seq_Private(V,0);
1335: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1337: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1338: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1340: VecViennaCLAllocateCheck(V);
1341: VecViennaCLAllocateCheckHost(V);
1342: VecSet(V,0.0);
1343: VecSet_Seq(V,0.0);
1344: V->offloadmask = PETSC_OFFLOAD_BOTH;
1345: return(0);
1346: }
1348: /*@C
1349: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1351: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1352: invoking clReleaseContext().
1355: Input Parameters:
1356: . v - the vector
1358: Output Parameters:
1359: . ctx - pointer to the underlying CL context
1361: Level: intermediate
1363: .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1364: @*/
1365: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1366: {
1367: #if !defined(PETSC_HAVE_OPENCL)
1368: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1369: #else
1375: const ViennaCLVector *v_vcl;
1376: VecViennaCLGetArrayRead(v, &v_vcl);
1377: try{
1378: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1379: const cl_context ocl_ctx = vcl_ctx.handle().get();
1380: clRetainContext(ocl_ctx);
1381: *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1382: } catch (std::exception const & ex) {
1383: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1384: }
1386: return(0);
1387: #endif
1388: }
1390: /*@C
1391: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1392: operations of the Vec are enqueued.
1394: Caller should cast (*queue) to (const cl_command_queue). Caller is
1395: responsible for invoking clReleaseCommandQueue().
1397: Input Parameters:
1398: . v - the vector
1400: Output Parameters:
1401: . ctx - pointer to the CL command queue
1403: Level: intermediate
1405: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1406: @*/
1407: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1408: {
1409: #if !defined(PETSC_HAVE_OPENCL)
1410: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1411: #else
1416: const ViennaCLVector *v_vcl;
1417: VecViennaCLGetArrayRead(v, &v_vcl);
1418: try{
1419: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1420: const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1421: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1422: clRetainCommandQueue(ocl_queue);
1423: *queue = (PETSC_UINTPTR_T)(ocl_queue);
1424: } catch (std::exception const & ex) {
1425: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1426: }
1428: return(0);
1429: #endif
1430: }
1432: /*@C
1433: VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1435: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1436: invoking clReleaseMemObject().
1438: Input Parameters:
1439: . v - the vector
1441: Output Parameters:
1442: . mem - pointer to the device buffer
1444: Level: intermediate
1446: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1447: @*/
1448: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1449: {
1450: #if !defined(PETSC_HAVE_OPENCL)
1451: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1452: #else
1457: const ViennaCLVector *v_vcl;
1458: VecViennaCLGetArrayRead(v, &v_vcl);
1459: try{
1460: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1461: clRetainMemObject(ocl_mem);
1462: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1463: } catch (std::exception const & ex) {
1464: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1465: }
1466: return(0);
1467: #endif
1468: }
1470: /*@C
1471: VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1473: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1474: invoking clReleaseMemObject().
1476: The device pointer has to be released by calling
1477: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1478: the host will be marked as out of date. A subsequent access of the host data
1479: will thus incur a data transfer from the device to the host.
1481: Input Parameters:
1482: . v - the vector
1484: Output Parameters:
1485: . mem - pointer to the device buffer
1487: Level: intermediate
1489: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1490: @*/
1491: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1492: {
1493: #if !defined(PETSC_HAVE_OPENCL)
1494: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1495: #else
1500: ViennaCLVector *v_vcl;
1501: VecViennaCLGetArrayWrite(v, &v_vcl);
1502: try{
1503: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1504: clRetainMemObject(ocl_mem);
1505: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1506: } catch (std::exception const & ex) {
1507: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1508: }
1510: return(0);
1511: #endif
1512: }
1514: /*@C
1515: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1516: acquired with VecViennaCLGetCLMemWrite().
1518: This marks the host data as out of date. Subsequent access to the
1519: vector data on the host side with for instance VecGetArray() incurs a
1520: data transfer.
1522: Input Parameters:
1523: . v - the vector
1525: Level: intermediate
1527: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1528: @*/
1529: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1530: {
1531: #if !defined(PETSC_HAVE_OPENCL)
1532: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1533: #else
1538: VecViennaCLRestoreArrayWrite(v, PETSC_NULL);
1540: return(0);
1541: #endif
1542: }
1545: /*@C
1546: VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1548: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1549: invoking clReleaseMemObject().
1551: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1552: Upon restoring the vector data the data on the host will be marked as out of
1553: date. A subsequent access of the host data will thus incur a data transfer
1554: from the device to the host.
1556: Input Parameters:
1557: . v - the vector
1559: Output Parameters:
1560: . mem - pointer to the device buffer
1562: Level: intermediate
1564: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1565: @*/
1566: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1567: {
1568: #if !defined(PETSC_HAVE_OPENCL)
1569: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1570: #else
1575: ViennaCLVector *v_vcl;
1576: VecViennaCLGetArray(v, &v_vcl);
1577: try{
1578: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1579: clRetainMemObject(ocl_mem);
1580: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1581: } catch (std::exception const & ex) {
1582: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1583: }
1585: return(0);
1586: #endif
1587: }
1589: /*@C
1590: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1591: acquired with VecViennaCLGetCLMem().
1593: This marks the host data as out of date. Subsequent access to the vector
1594: data on the host side with for instance VecGetArray() incurs a data
1595: transfer.
1597: Input Parameters:
1598: . v - the vector
1600: Level: intermediate
1602: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1603: @*/
1604: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1605: {
1606: #if !defined(PETSC_HAVE_OPENCL)
1607: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1608: #else
1613: VecViennaCLRestoreArray(v, PETSC_NULL);
1615: return(0);
1616: #endif
1617: }
1619: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1620: {
1622: Vec_ViennaCL *vecviennacl;
1623: PetscMPIInt size;
1626: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1627: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1628: VecCreate_Seq_Private(V,0);
1629: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1630: VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1631: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1633: if (array) {
1634: if (!V->spptr)
1635: V->spptr = new Vec_ViennaCL;
1636: vecviennacl = (Vec_ViennaCL*)V->spptr;
1637: vecviennacl->GPUarray_allocated = 0;
1638: vecviennacl->GPUarray = (ViennaCLVector*)array;
1639: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1640: }
1642: return(0);
1643: }