Actual source code: vecviennacl.cxx
petsc-3.6.4 2016-04-12
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h> /*I "petscvec.h" I*/
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include <vector>
12: #include "viennacl/linalg/inner_prod.hpp"
13: #include "viennacl/linalg/norm_1.hpp"
14: #include "viennacl/linalg/norm_2.hpp"
15: #include "viennacl/linalg/norm_inf.hpp"
16: #include "viennacl/ocl/backend.hpp"
21: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayReadWrite(Vec v, ViennaCLVector **a)
22: {
26: *a = 0;
27: VecViennaCLCopyToGPU(v);
28: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
29: ViennaCLWaitForGPU();
30: return(0);
31: }
35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayReadWrite(Vec v, ViennaCLVector **a)
36: {
40: v->valid_GPU_array = PETSC_VIENNACL_GPU;
42: PetscObjectStateIncrease((PetscObject)v);
43: return(0);
44: }
48: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
49: {
53: *a = 0;
54: VecViennaCLCopyToGPU(v);
55: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
56: ViennaCLWaitForGPU();
57: return(0);
58: }
62: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
63: {
65: return(0);
66: }
70: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
71: {
75: *a = 0;
76: VecViennaCLAllocateCheck(v);
77: *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
78: ViennaCLWaitForGPU();
79: return(0);
80: }
84: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
85: {
89: v->valid_GPU_array = PETSC_VIENNACL_GPU;
91: PetscObjectStateIncrease((PetscObject)v);
92: return(0);
93: }
99: PETSC_EXTERN PetscErrorCode PetscObjectViennaCLSetFromOptions(PetscObject obj)
100: {
101: PetscErrorCode ierr;
102: PetscBool flg;
105: PetscObjectOptionsBegin(obj);
107: PetscOptionsHasName(NULL,"-viennacl_device_cpu",&flg);
108: if (flg) {
109: try {
110: viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
111: } catch (std::exception const & ex) {
112: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
113: }
114: }
115: PetscOptionsHasName(NULL,"-viennacl_device_gpu",&flg);
116: if (flg) {
117: try {
118: viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
119: } catch (std::exception const & ex) {
120: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
121: }
122: }
123: PetscOptionsHasName(NULL,"-viennacl_device_accelerator",&flg);
124: if (flg) {
125: try {
126: viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
127: } catch (std::exception const & ex) {
128: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
129: }
130: }
132: PetscOptionsEnd();
133: return(0);
134: }
138: /*
139: Allocates space for the vector array on the Host if it does not exist.
140: Does NOT change the PetscViennaCLFlag for the vector
141: Does NOT zero the ViennaCL array
142: */
143: PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
144: {
146: PetscScalar *array;
147: Vec_Seq *s;
148: PetscInt n = v->map->n;
151: s = (Vec_Seq*)v->data;
152: VecViennaCLAllocateCheck(v);
153: if (s->array == 0) {
154: PetscMalloc1(n,&array);
155: PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
156: s->array = array;
157: s->array_allocated = array;
158: }
159: return(0);
160: }
165: /*
166: Allocates space for the vector array on the GPU if it does not exist.
167: Does NOT change the PetscViennaCLFlag for the vector
168: Does NOT zero the ViennaCL array
170: */
171: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
172: {
174: int rank;
177: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
178: // First allocate memory on the GPU if needed
179: if (!v->spptr) {
180: try {
181: PetscObjectViennaCLSetFromOptions((PetscObject)v);
182: v->spptr = new Vec_ViennaCL;
183: ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);
185: } catch(std::exception const & ex) {
186: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
187: }
188: }
189: return(0);
190: }
195: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
196: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
197: {
201: VecViennaCLAllocateCheck(v);
202: if (v->map->n > 0) {
203: if (v->valid_GPU_array == PETSC_VIENNACL_CPU) {
204: PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
205: try {
206: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
207: viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
208: ViennaCLWaitForGPU();
209: } catch(std::exception const & ex) {
210: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
211: }
212: PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
213: v->valid_GPU_array = PETSC_VIENNACL_BOTH;
214: }
215: }
216: return(0);
217: }
223: /*
224: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
225: */
226: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
227: {
231: VecViennaCLAllocateCheckHost(v);
232: if (v->valid_GPU_array == PETSC_VIENNACL_GPU) {
233: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
234: try {
235: ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
236: viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
237: ViennaCLWaitForGPU();
238: } catch(std::exception const & ex) {
239: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
240: }
241: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
242: v->valid_GPU_array = PETSC_VIENNACL_BOTH;
243: }
244: return(0);
245: }
248: /* Copy on CPU */
251: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
252: {
253: PetscScalar *ya;
254: const PetscScalar *xa;
255: PetscErrorCode ierr;
258: VecViennaCLAllocateCheckHost(xin);
259: VecViennaCLAllocateCheckHost(yin);
260: if (xin != yin) {
261: VecGetArrayRead(xin,&xa);
262: VecGetArray(yin,&ya);
263: PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
264: VecRestoreArrayRead(xin,&xa);
265: VecRestoreArray(yin,&ya);
266: }
267: return(0);
268: }
272: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
273: {
275: PetscInt n = xin->map->n,i;
276: PetscScalar *xx;
279: VecGetArray(xin,&xx);
280: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
281: VecRestoreArray(xin,&xx);
282: return(0);
283: }
287: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
288: {
289: Vec_Seq *vs = (Vec_Seq*)v->data;
293: PetscObjectSAWsViewOff(v);
294: #if defined(PETSC_USE_LOG)
295: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
296: #endif
297: if (vs->array_allocated) PetscFree(vs->array_allocated);
298: PetscFree(vs);
299: return(0);
300: }
304: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
305: {
306: Vec_Seq *v = (Vec_Seq*)vin->data;
309: v->array = v->unplacedarray;
310: v->unplacedarray = 0;
311: return(0);
312: }
315: /*MC
316: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
318: Options Database Keys:
319: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
321: Level: beginner
323: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
324: M*/
329: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
330: {
331: const ViennaCLVector *xgpu;
332: ViennaCLVector *ygpu;
333: PetscErrorCode ierr;
336: VecViennaCLGetArrayRead(xin,&xgpu);
337: VecViennaCLGetArrayReadWrite(yin,&ygpu);
338: try {
339: if (alpha != 0.0 && xin->map->n > 0) {
340: *ygpu = *xgpu + alpha * *ygpu;
341: PetscLogFlops(2.0*yin->map->n);
342: } else {
343: *ygpu = *xgpu;
344: }
345: ViennaCLWaitForGPU();
346: } catch(std::exception const & ex) {
347: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
348: }
349: VecViennaCLRestoreArrayRead(xin,&xgpu);
350: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
351: return(0);
352: }
357: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
358: {
359: const ViennaCLVector *xgpu;
360: ViennaCLVector *ygpu;
361: PetscErrorCode ierr;
364: if (alpha != 0.0 && xin->map->n > 0) {
365: VecViennaCLGetArrayRead(xin,&xgpu);
366: VecViennaCLGetArrayReadWrite(yin,&ygpu);
367: try {
368: *ygpu += alpha * *xgpu;
369: ViennaCLWaitForGPU();
370: } catch(std::exception const & ex) {
371: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
372: }
373: VecViennaCLRestoreArrayRead(xin,&xgpu);
374: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
375: PetscLogFlops(2.0*yin->map->n);
376: }
377: return(0);
378: }
383: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
384: {
385: const ViennaCLVector *xgpu,*ygpu;
386: ViennaCLVector *wgpu;
387: PetscErrorCode ierr;
390: if (xin->map->n > 0) {
391: VecViennaCLGetArrayRead(xin,&xgpu);
392: VecViennaCLGetArrayRead(yin,&ygpu);
393: VecViennaCLGetArrayWrite(win,&wgpu);
394: try {
395: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
396: ViennaCLWaitForGPU();
397: } catch(std::exception const & ex) {
398: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
399: }
400: PetscLogFlops(win->map->n);
401: VecViennaCLRestoreArrayRead(xin,&xgpu);
402: VecViennaCLRestoreArrayRead(yin,&ygpu);
403: VecViennaCLRestoreArrayWrite(win,&wgpu);
404: }
405: return(0);
406: }
411: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
412: {
413: const ViennaCLVector *xgpu,*ygpu;
414: ViennaCLVector *wgpu;
415: PetscErrorCode ierr;
418: if (alpha == 0.0 && xin->map->n > 0) {
419: VecCopy_SeqViennaCL(yin,win);
420: } else {
421: VecViennaCLGetArrayRead(xin,&xgpu);
422: VecViennaCLGetArrayRead(yin,&ygpu);
423: VecViennaCLGetArrayWrite(win,&wgpu);
424: if (alpha == 1.0) {
425: try {
426: *wgpu = *ygpu + *xgpu;
427: } catch(std::exception const & ex) {
428: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
429: }
430: PetscLogFlops(win->map->n);
431: } else if (alpha == -1.0) {
432: try {
433: *wgpu = *ygpu - *xgpu;
434: } catch(std::exception const & ex) {
435: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
436: }
437: PetscLogFlops(win->map->n);
438: } else {
439: try {
440: *wgpu = *ygpu + alpha * *xgpu;
441: } catch(std::exception const & ex) {
442: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
443: }
444: PetscLogFlops(2*win->map->n);
445: }
446: ViennaCLWaitForGPU();
447: VecViennaCLRestoreArrayRead(xin,&xgpu);
448: VecViennaCLRestoreArrayRead(yin,&ygpu);
449: VecViennaCLRestoreArrayWrite(win,&wgpu);
450: }
451: return(0);
452: }
455: /*
456: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
457: *
458: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
459: * hence there is an iterated application of these until the final result is obtained
460: */
463: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
464: {
466: PetscInt j;
469: for (j = 0; j < nv; ++j) {
470: if (j+1 < nv) {
471: VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
472: ++j;
473: } else {
474: VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
475: }
476: }
477: ViennaCLWaitForGPU();
478: return(0);
479: }
484: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
485: {
486: const ViennaCLVector *xgpu,*ygpu;
487: PetscErrorCode ierr;
490: if (xin->map->n > 0) {
491: VecViennaCLGetArrayRead(xin,&xgpu);
492: VecViennaCLGetArrayRead(yin,&ygpu);
493: try {
494: *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
495: } catch(std::exception const & ex) {
496: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
497: }
498: if (xin->map->n >0) {
499: PetscLogFlops(2.0*xin->map->n-1);
500: }
501: ViennaCLWaitForGPU();
502: VecViennaCLRestoreArrayRead(xin,&xgpu);
503: VecViennaCLRestoreArrayRead(yin,&ygpu);
504: } else *z = 0.0;
505: return(0);
506: }
510: /*
511: * Operation z[j] = dot(x, y[j])
512: *
513: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
514: */
517: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
518: {
519: PetscErrorCode ierr;
520: PetscInt n = xin->map->n,i;
521: const ViennaCLVector *xgpu,*ygpu;
522: Vec *yyin = (Vec*)yin;
523: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
526: if (xin->map->n > 0) {
527: VecViennaCLGetArrayRead(xin,&xgpu);
528: for (i=0; i<nv; i++) {
529: VecViennaCLGetArrayRead(yyin[i],&ygpu);
530: ygpu_array[i] = ygpu;
531: }
533: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
534: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
536: for (i=0; i<nv; i++) {
537: viennacl::copy(result.begin(), result.end(), z);
538: VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
539: }
541: ViennaCLWaitForGPU();
542: VecViennaCLRestoreArrayRead(xin,&xgpu);
543: PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));
544: } else {
545: for (i=0; i<nv; i++) z[i] = 0.0;
546: }
547: return(0);
548: }
554: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
555: {
556: ViennaCLVector *xgpu;
560: if (xin->map->n > 0) {
561: VecViennaCLGetArrayWrite(xin,&xgpu);
562: try {
563: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
564: ViennaCLWaitForGPU();
565: } catch(std::exception const & ex) {
566: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
567: }
568: VecViennaCLRestoreArrayWrite(xin,&xgpu);
569: }
570: return(0);
571: }
575: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
576: {
577: ViennaCLVector *xgpu;
581: if (alpha == 0.0 && xin->map->n > 0) {
582: VecSet_SeqViennaCL(xin,alpha);
583: PetscLogFlops(xin->map->n);
584: } else if (alpha != 1.0 && xin->map->n > 0) {
585: VecViennaCLGetArrayReadWrite(xin,&xgpu);
586: try {
587: *xgpu *= alpha;
588: ViennaCLWaitForGPU();
589: } catch(std::exception const & ex) {
590: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
591: }
592: VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
593: PetscLogFlops(xin->map->n);
594: }
595: return(0);
596: }
601: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
602: {
606: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
607: VecDot_SeqViennaCL(xin, yin, z);
608: ViennaCLWaitForGPU();
609: return(0);
610: }
615: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
616: {
617: const ViennaCLVector *xgpu;
618: ViennaCLVector *ygpu;
619: PetscErrorCode ierr;
622: if (xin != yin && xin->map->n > 0) {
623: if (xin->valid_GPU_array == PETSC_VIENNACL_GPU) {
624: VecViennaCLGetArrayRead(xin,&xgpu);
625: VecViennaCLGetArrayWrite(yin,&ygpu);
626: try {
627: *ygpu = *xgpu;
628: ViennaCLWaitForGPU();
629: } catch(std::exception const & ex) {
630: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
631: }
632: VecViennaCLRestoreArrayRead(xin,&xgpu);
633: VecViennaCLRestoreArrayWrite(yin,&ygpu);
635: } else if (xin->valid_GPU_array == PETSC_VIENNACL_CPU) {
636: /* copy in CPU if we are on the CPU*/
637: VecCopy_SeqViennaCL_Private(xin,yin);
638: ViennaCLWaitForGPU();
639: } else if (xin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
640: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
641: if (yin->valid_GPU_array == PETSC_VIENNACL_CPU) {
642: /* copy in CPU */
643: VecCopy_SeqViennaCL_Private(xin,yin);
644: ViennaCLWaitForGPU();
645: } else if (yin->valid_GPU_array == PETSC_VIENNACL_GPU) {
646: /* copy in GPU */
647: VecViennaCLGetArrayRead(xin,&xgpu);
648: VecViennaCLGetArrayWrite(yin,&ygpu);
649: try {
650: *ygpu = *xgpu;
651: ViennaCLWaitForGPU();
652: } catch(std::exception const & ex) {
653: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
654: }
655: VecViennaCLRestoreArrayRead(xin,&xgpu);
656: VecViennaCLRestoreArrayWrite(yin,&ygpu);
657: } else if (yin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
658: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
659: default to copy in GPU (this is an arbitrary choice) */
660: VecViennaCLGetArrayRead(xin,&xgpu);
661: VecViennaCLGetArrayWrite(yin,&ygpu);
662: try {
663: *ygpu = *xgpu;
664: ViennaCLWaitForGPU();
665: } catch(std::exception const & ex) {
666: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
667: }
668: VecViennaCLRestoreArrayRead(xin,&xgpu);
669: VecViennaCLRestoreArrayWrite(yin,&ygpu);
670: } else {
671: VecCopy_SeqViennaCL_Private(xin,yin);
672: ViennaCLWaitForGPU();
673: }
674: }
675: }
676: return(0);
677: }
682: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
683: {
685: ViennaCLVector *xgpu,*ygpu;
688: if (xin != yin && xin->map->n > 0) {
689: VecViennaCLGetArrayReadWrite(xin,&xgpu);
690: VecViennaCLGetArrayReadWrite(yin,&ygpu);
692: try {
693: viennacl::swap(*xgpu, *ygpu);
694: ViennaCLWaitForGPU();
695: } catch(std::exception const & ex) {
696: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
697: }
698: VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
699: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
700: }
701: return(0);
702: }
705: // y = alpha * x + beta * y
708: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
709: {
710: PetscErrorCode ierr;
711: PetscScalar a = alpha,b = beta;
712: const ViennaCLVector *xgpu;
713: ViennaCLVector *ygpu;
716: if (a == 0.0 && xin->map->n > 0) {
717: VecScale_SeqViennaCL(yin,beta);
718: } else if (b == 1.0 && xin->map->n > 0) {
719: VecAXPY_SeqViennaCL(yin,alpha,xin);
720: } else if (a == 1.0 && xin->map->n > 0) {
721: VecAYPX_SeqViennaCL(yin,beta,xin);
722: } else if (b == 0.0 && xin->map->n > 0) {
723: VecViennaCLGetArrayRead(xin,&xgpu);
724: VecViennaCLGetArrayReadWrite(yin,&ygpu);
725: try {
726: *ygpu = *xgpu * alpha;
727: ViennaCLWaitForGPU();
728: } catch(std::exception const & ex) {
729: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
730: }
731: PetscLogFlops(xin->map->n);
732: VecViennaCLRestoreArrayRead(xin,&xgpu);
733: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
734: } else if (xin->map->n > 0) {
735: VecViennaCLGetArrayRead(xin,&xgpu);
736: VecViennaCLGetArrayReadWrite(yin,&ygpu);
737: try {
738: *ygpu = *xgpu * alpha + *ygpu * beta;
739: ViennaCLWaitForGPU();
740: } catch(std::exception const & ex) {
741: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
742: }
743: VecViennaCLRestoreArrayRead(xin,&xgpu);
744: VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
745: PetscLogFlops(3.0*xin->map->n);
746: }
747: return(0);
748: }
751: /* operation z = alpha * x + beta *y + gamma *z*/
754: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
755: {
756: PetscErrorCode ierr;
757: PetscInt n = zin->map->n;
758: const ViennaCLVector *xgpu,*ygpu;
759: ViennaCLVector *zgpu;
762: VecViennaCLGetArrayRead(xin,&xgpu);
763: VecViennaCLGetArrayRead(yin,&ygpu);
764: VecViennaCLGetArrayReadWrite(zin,&zgpu);
765: if (alpha == 0.0 && xin->map->n > 0) {
766: try {
767: if (beta == 0.0) {
768: *zgpu = gamma * *zgpu;
769: ViennaCLWaitForGPU();
770: PetscLogFlops(1.0*n);
771: } else if (gamma == 0.0) {
772: *zgpu = beta * *ygpu;
773: ViennaCLWaitForGPU();
774: PetscLogFlops(1.0*n);
775: } else {
776: *zgpu = beta * *ygpu + gamma * *zgpu;
777: ViennaCLWaitForGPU();
778: PetscLogFlops(3.0*n);
779: }
780: } catch(std::exception const & ex) {
781: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
782: }
783: PetscLogFlops(3.0*n);
784: } else if (beta == 0.0 && xin->map->n > 0) {
785: try {
786: if (gamma == 0.0) {
787: *zgpu = alpha * *xgpu;
788: ViennaCLWaitForGPU();
789: PetscLogFlops(1.0*n);
790: } else {
791: *zgpu = alpha * *xgpu + gamma * *zgpu;
792: ViennaCLWaitForGPU();
793: PetscLogFlops(3.0*n);
794: }
795: } catch(std::exception const & ex) {
796: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
797: }
798: } else if (gamma == 0.0 && xin->map->n > 0) {
799: try {
800: *zgpu = alpha * *xgpu + beta * *ygpu;
801: ViennaCLWaitForGPU();
802: } catch(std::exception const & ex) {
803: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
804: }
805: PetscLogFlops(3.0*n);
806: } else if (xin->map->n > 0) {
807: try {
808: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
809: if (gamma != 1.0)
810: *zgpu *= gamma;
811: *zgpu += alpha * *xgpu + beta * *ygpu;
812: ViennaCLWaitForGPU();
813: } catch(std::exception const & ex) {
814: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
815: }
816: VecViennaCLRestoreArrayReadWrite(zin,&zgpu);
817: VecViennaCLRestoreArrayRead(xin,&xgpu);
818: VecViennaCLRestoreArrayRead(yin,&ygpu);
819: PetscLogFlops(5.0*n);
820: }
821: return(0);
822: }
826: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
827: {
828: PetscErrorCode ierr;
829: PetscInt n = win->map->n;
830: const ViennaCLVector *xgpu,*ygpu;
831: ViennaCLVector *wgpu;
834: if (xin->map->n > 0) {
835: VecViennaCLGetArrayRead(xin,&xgpu);
836: VecViennaCLGetArrayRead(yin,&ygpu);
837: VecViennaCLGetArrayReadWrite(win,&wgpu);
838: try {
839: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
840: ViennaCLWaitForGPU();
841: } catch(std::exception const & ex) {
842: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
843: }
844: VecViennaCLRestoreArrayRead(xin,&xgpu);
845: VecViennaCLRestoreArrayRead(yin,&ygpu);
846: VecViennaCLRestoreArrayReadWrite(win,&wgpu);
847: PetscLogFlops(n);
848: }
849: return(0);
850: }
855: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
856: {
857: PetscErrorCode ierr;
858: PetscInt n = xin->map->n;
859: PetscBLASInt bn;
860: const ViennaCLVector *xgpu;
863: if (xin->map->n > 0) {
864: PetscBLASIntCast(n,&bn);
865: VecViennaCLGetArrayRead(xin,&xgpu);
866: if (type == NORM_2 || type == NORM_FROBENIUS) {
867: try {
868: *z = viennacl::linalg::norm_2(*xgpu);
869: ViennaCLWaitForGPU();
870: } catch(std::exception const & ex) {
871: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
872: }
873: PetscLogFlops(PetscMax(2.0*n-1,0.0));
874: } else if (type == NORM_INFINITY) {
875: VecViennaCLGetArrayRead(xin,&xgpu);
876: try {
877: *z = viennacl::linalg::norm_inf(*xgpu);
878: ViennaCLWaitForGPU();
879: } catch(std::exception const & ex) {
880: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
881: }
882: VecViennaCLRestoreArrayRead(xin,&xgpu);
883: } else if (type == NORM_1) {
884: try {
885: *z = viennacl::linalg::norm_1(*xgpu);
886: ViennaCLWaitForGPU();
887: } catch(std::exception const & ex) {
888: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
889: }
890: PetscLogFlops(PetscMax(n-1.0,0.0));
891: } else if (type == NORM_1_AND_2) {
892: try {
893: *z = viennacl::linalg::norm_1(*xgpu);
894: *(z+1) = viennacl::linalg::norm_2(*xgpu);
895: ViennaCLWaitForGPU();
896: } catch(std::exception const & ex) {
897: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
898: }
899: PetscLogFlops(PetscMax(2.0*n-1,0.0));
900: PetscLogFlops(PetscMax(n-1.0,0.0));
901: }
902: VecViennaCLRestoreArrayRead(xin,&xgpu);
903: } else if (type == NORM_1_AND_2) {
904: *z = 0.0;
905: *(z+1) = 0.0;
906: } else *z = 0.0;
907: return(0);
908: }
913: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
914: {
918: VecSetRandom_SeqViennaCL_Private(xin,r);
919: xin->valid_GPU_array = PETSC_VIENNACL_CPU;
920: return(0);
921: }
925: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
926: {
930: VecViennaCLCopyFromGPU(vin);
931: VecResetArray_SeqViennaCL_Private(vin);
932: vin->valid_GPU_array = PETSC_VIENNACL_CPU;
933: return(0);
934: }
938: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
939: {
943: VecViennaCLCopyFromGPU(vin);
944: VecPlaceArray_Seq(vin,a);
945: vin->valid_GPU_array = PETSC_VIENNACL_CPU;
946: return(0);
947: }
952: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
953: {
957: VecViennaCLCopyFromGPU(vin);
958: VecReplaceArray_Seq(vin,a);
959: vin->valid_GPU_array = PETSC_VIENNACL_CPU;
960: return(0);
961: }
966: /*@
967: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
969: Collective on MPI_Comm
971: Input Parameter:
972: + comm - the communicator, should be PETSC_COMM_SELF
973: - n - the vector length
975: Output Parameter:
976: . V - the vector
978: Notes:
979: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
980: same type as an existing vector.
982: Level: intermediate
984: Concepts: vectors^creating sequential
986: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
987: @*/
988: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
989: {
993: VecCreate(comm,v);
994: VecSetSizes(*v,n,n);
995: VecSetType(*v,VECSEQVIENNACL);
996: return(0);
997: }
1000: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1001: *
1002: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1003: */
1006: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1007: {
1008: PetscErrorCode ierr;
1011: VecDot_SeqViennaCL(s,t,dp);
1012: VecNorm_SeqViennaCL(t,NORM_2,nm);
1013: *nm *= *nm; //squared norm required
1014: return(0);
1015: }
1019: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1020: {
1024: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1025: PetscLayoutReference(win->map,&(*V)->map);
1026: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1027: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1028: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1029: return(0);
1030: }
1034: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1035: {
1039: try {
1040: if (v->spptr) {
1041: delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1042: delete (Vec_ViennaCL*) v->spptr;
1043: }
1044: } catch(char *ex) {
1045: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1046: }
1047: VecDestroy_SeqViennaCL_Private(v);
1048: return(0);
1049: }
1054: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1055: {
1057: PetscMPIInt size;
1060: MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1061: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1062: VecCreate_Seq_Private(V,0);
1063: PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1065: V->ops->dot = VecDot_SeqViennaCL;
1066: V->ops->norm = VecNorm_SeqViennaCL;
1067: V->ops->tdot = VecTDot_SeqViennaCL;
1068: V->ops->scale = VecScale_SeqViennaCL;
1069: V->ops->copy = VecCopy_SeqViennaCL;
1070: V->ops->set = VecSet_SeqViennaCL;
1071: V->ops->swap = VecSwap_SeqViennaCL;
1072: V->ops->axpy = VecAXPY_SeqViennaCL;
1073: V->ops->axpby = VecAXPBY_SeqViennaCL;
1074: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1075: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1076: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1077: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1078: V->ops->dot_local = VecDot_SeqViennaCL;
1079: V->ops->tdot_local = VecTDot_SeqViennaCL;
1080: V->ops->norm_local = VecNorm_SeqViennaCL;
1081: V->ops->mdot_local = VecMDot_SeqViennaCL;
1082: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1083: V->ops->mdot = VecMDot_SeqViennaCL;
1084: V->ops->aypx = VecAYPX_SeqViennaCL;
1085: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1086: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1087: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1088: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1089: V->ops->resetarray = VecResetArray_SeqViennaCL;
1090: V->ops->destroy = VecDestroy_SeqViennaCL;
1091: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1093: VecViennaCLAllocateCheck(V);
1094: V->valid_GPU_array = PETSC_VIENNACL_GPU;
1095: VecSet(V,0.0);
1096: return(0);
1097: }