Actual source code: vecviennacl.cxx

petsc-3.7.2 2016-06-05
Report Typos and Errors
  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>          /*I "petscvec.h" I*/
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include <vector>

 12: #include "viennacl/linalg/inner_prod.hpp"
 13: #include "viennacl/linalg/norm_1.hpp"
 14: #include "viennacl/linalg/norm_2.hpp"
 15: #include "viennacl/linalg/norm_inf.hpp"
 16: #include "viennacl/ocl/backend.hpp"


 21: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayReadWrite(Vec v, ViennaCLVector **a)
 22: {

 26:   *a   = 0;
 27:   VecViennaCLCopyToGPU(v);
 28:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 29:   ViennaCLWaitForGPU();
 30:   return(0);
 31: }

 35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayReadWrite(Vec v, ViennaCLVector **a)
 36: {

 40:   v->valid_GPU_array = PETSC_VIENNACL_GPU;

 42:   PetscObjectStateIncrease((PetscObject)v);
 43:   return(0);
 44: }

 48: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 49: {

 53:   *a   = 0;
 54:   VecViennaCLCopyToGPU(v);
 55:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 56:   ViennaCLWaitForGPU();
 57:   return(0);
 58: }

 62: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 63: {
 65:   return(0);
 66: }

 70: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 71: {

 75:   *a   = 0;
 76:   VecViennaCLAllocateCheck(v);
 77:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 78:   ViennaCLWaitForGPU();
 79:   return(0);
 80: }

 84: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 85: {

 89:   v->valid_GPU_array = PETSC_VIENNACL_GPU;

 91:   PetscObjectStateIncrease((PetscObject)v);
 92:   return(0);
 93: }



 99: PETSC_EXTERN PetscErrorCode PetscObjectViennaCLSetFromOptions(PetscObject obj)
100: {
101:   PetscErrorCode       ierr;
102:   PetscBool            flg;

105:   PetscObjectOptionsBegin(obj);

107:   PetscOptionsHasName(NULL,NULL,"-viennacl_device_cpu",&flg);
108:   if (flg) {
109:     try {
110:       viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
111:     } catch (std::exception const & ex) {
112:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
113:     }
114:   }
115:   PetscOptionsHasName(NULL,NULL,"-viennacl_device_gpu",&flg);
116:   if (flg) {
117:     try {
118:       viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
119:     } catch (std::exception const & ex) {
120:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
121:     }
122:   }
123:   PetscOptionsHasName(NULL,NULL,"-viennacl_device_accelerator",&flg);
124:   if (flg) {
125:     try {
126:       viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
127:     } catch (std::exception const & ex) {
128:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
129:     }
130:   }

132:   PetscOptionsEnd();
133:   return(0);
134: }

138: /*
139:     Allocates space for the vector array on the Host if it does not exist.
140:     Does NOT change the PetscViennaCLFlag for the vector
141:     Does NOT zero the ViennaCL array
142:  */
143: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
144: {
146:   PetscScalar    *array;
147:   Vec_Seq        *s;
148:   PetscInt       n = v->map->n;

151:   s    = (Vec_Seq*)v->data;
152:   VecViennaCLAllocateCheck(v);
153:   if (s->array == 0) {
154:     PetscMalloc1(n,&array);
155:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
156:     s->array           = array;
157:     s->array_allocated = array;
158:   }
159:   return(0);
160: }


165: /*
166:     Allocates space for the vector array on the GPU if it does not exist.
167:     Does NOT change the PetscViennaCLFlag for the vector
168:     Does NOT zero the ViennaCL array

170:  */
171: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
172: {
174:   int            rank;

177:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
178:   // First allocate memory on the GPU if needed
179:   if (!v->spptr) {
180:     try {
181:       PetscObjectViennaCLSetFromOptions((PetscObject)v);
182:       v->spptr                            = new Vec_ViennaCL;
183:       ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);

185:     } catch(std::exception const & ex) {
186:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
187:     }
188:   }
189:   return(0);
190: }


195: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
196: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
197: {

201:   VecViennaCLAllocateCheck(v);
202:   if (v->map->n > 0) {
203:     if (v->valid_GPU_array == PETSC_VIENNACL_CPU) {
204:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
205:       try {
206:         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
207:         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
208:         ViennaCLWaitForGPU();
209:       } catch(std::exception const & ex) {
210:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
211:       }
212:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
213:       v->valid_GPU_array = PETSC_VIENNACL_BOTH;
214:     }
215:   }
216:   return(0);
217: }



223: /*
224:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
225: */
226: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
227: {

231:   VecViennaCLAllocateCheckHost(v);
232:   if (v->valid_GPU_array == PETSC_VIENNACL_GPU) {
233:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
234:     try {
235:       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
236:       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
237:       ViennaCLWaitForGPU();
238:     } catch(std::exception const & ex) {
239:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
240:     }
241:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
242:     v->valid_GPU_array = PETSC_VIENNACL_BOTH;
243:   }
244:   return(0);
245: }


248: /* Copy on CPU */
251: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
252: {
253:   PetscScalar       *ya;
254:   const PetscScalar *xa;
255:   PetscErrorCode    ierr;

258:   VecViennaCLAllocateCheckHost(xin);
259:   VecViennaCLAllocateCheckHost(yin);
260:   if (xin != yin) {
261:     VecGetArrayRead(xin,&xa);
262:     VecGetArray(yin,&ya);
263:     PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
264:     VecRestoreArrayRead(xin,&xa);
265:     VecRestoreArray(yin,&ya);
266:   }
267:   return(0);
268: }

272: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
273: {
275:   PetscInt       n = xin->map->n,i;
276:   PetscScalar    *xx;

279:   VecGetArray(xin,&xx);
280:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
281:   VecRestoreArray(xin,&xx);
282:   return(0);
283: }

287: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
288: {
289:   Vec_Seq        *vs = (Vec_Seq*)v->data;

293:   PetscObjectSAWsViewOff(v);
294: #if defined(PETSC_USE_LOG)
295:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
296: #endif
297:   if (vs->array_allocated) PetscFree(vs->array_allocated);
298:   PetscFree(vs);
299:   return(0);
300: }

304: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
305: {
306:   Vec_Seq *v = (Vec_Seq*)vin->data;

309:   v->array         = v->unplacedarray;
310:   v->unplacedarray = 0;
311:   return(0);
312: }


315: /*MC
316:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

318:    Options Database Keys:
319: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

321:   Level: beginner

323: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
324: M*/


329: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
330: {
331:   const ViennaCLVector  *xgpu;
332:   ViennaCLVector        *ygpu;
333:   PetscErrorCode        ierr;

336:   VecViennaCLGetArrayRead(xin,&xgpu);
337:   VecViennaCLGetArrayReadWrite(yin,&ygpu);
338:   try {
339:     if (alpha != 0.0 && xin->map->n > 0) {
340:       *ygpu = *xgpu + alpha * *ygpu;
341:       PetscLogFlops(2.0*yin->map->n);
342:     } else {
343:       *ygpu = *xgpu;
344:     }
345:     ViennaCLWaitForGPU();
346:   } catch(std::exception const & ex) {
347:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
348:   }
349:   VecViennaCLRestoreArrayRead(xin,&xgpu);
350:   VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
351:   return(0);
352: }


357: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
358: {
359:   const ViennaCLVector  *xgpu;
360:   ViennaCLVector        *ygpu;
361:   PetscErrorCode        ierr;

364:   if (alpha != 0.0 && xin->map->n > 0) {
365:     VecViennaCLGetArrayRead(xin,&xgpu);
366:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
367:     try {
368:       *ygpu += alpha * *xgpu;
369:       ViennaCLWaitForGPU();
370:     } catch(std::exception const & ex) {
371:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
372:     }
373:     VecViennaCLRestoreArrayRead(xin,&xgpu);
374:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
375:     PetscLogFlops(2.0*yin->map->n);
376:   }
377:   return(0);
378: }


383: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
384: {
385:   const ViennaCLVector  *xgpu,*ygpu;
386:   ViennaCLVector        *wgpu;
387:   PetscErrorCode        ierr;

390:   if (xin->map->n > 0) {
391:     VecViennaCLGetArrayRead(xin,&xgpu);
392:     VecViennaCLGetArrayRead(yin,&ygpu);
393:     VecViennaCLGetArrayWrite(win,&wgpu);
394:     try {
395:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
396:       ViennaCLWaitForGPU();
397:     } catch(std::exception const & ex) {
398:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
399:     }
400:     PetscLogFlops(win->map->n);
401:     VecViennaCLRestoreArrayRead(xin,&xgpu);
402:     VecViennaCLRestoreArrayRead(yin,&ygpu);
403:     VecViennaCLRestoreArrayWrite(win,&wgpu);
404:   }
405:   return(0);
406: }


411: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
412: {
413:   const ViennaCLVector  *xgpu,*ygpu;
414:   ViennaCLVector        *wgpu;
415:   PetscErrorCode        ierr;

418:   if (alpha == 0.0 && xin->map->n > 0) {
419:     VecCopy_SeqViennaCL(yin,win);
420:   } else {
421:     VecViennaCLGetArrayRead(xin,&xgpu);
422:     VecViennaCLGetArrayRead(yin,&ygpu);
423:     VecViennaCLGetArrayWrite(win,&wgpu);
424:     if (alpha == 1.0) {
425:       try {
426:         *wgpu = *ygpu + *xgpu;
427:       } catch(std::exception const & ex) {
428:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
429:       }
430:       PetscLogFlops(win->map->n);
431:     } else if (alpha == -1.0) {
432:       try {
433:         *wgpu = *ygpu - *xgpu;
434:       } catch(std::exception const & ex) {
435:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
436:       }
437:       PetscLogFlops(win->map->n);
438:     } else {
439:       try {
440:         *wgpu = *ygpu + alpha * *xgpu;
441:       } catch(std::exception const & ex) {
442:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
443:       }
444:       PetscLogFlops(2*win->map->n);
445:     }
446:     ViennaCLWaitForGPU();
447:     VecViennaCLRestoreArrayRead(xin,&xgpu);
448:     VecViennaCLRestoreArrayRead(yin,&ygpu);
449:     VecViennaCLRestoreArrayWrite(win,&wgpu);
450:   }
451:   return(0);
452: }


455: /*
456:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
457:  *
458:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
459:  * hence there is an iterated application of these until the final result is obtained
460:  */
463: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
464: {
466:   PetscInt       j;

469:   for (j = 0; j < nv; ++j) {
470:     if (j+1 < nv) {
471:       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
472:       ++j;
473:     } else {
474:       VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
475:     }
476:   }
477:   ViennaCLWaitForGPU();
478:   return(0);
479: }


484: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
485: {
486:   const ViennaCLVector  *xgpu,*ygpu;
487:   PetscErrorCode        ierr;

490:   if (xin->map->n > 0) {
491:     VecViennaCLGetArrayRead(xin,&xgpu);
492:     VecViennaCLGetArrayRead(yin,&ygpu);
493:     try {
494:       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
495:     } catch(std::exception const & ex) {
496:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
497:     }
498:     if (xin->map->n >0) {
499:       PetscLogFlops(2.0*xin->map->n-1);
500:     }
501:     ViennaCLWaitForGPU();
502:     VecViennaCLRestoreArrayRead(xin,&xgpu);
503:     VecViennaCLRestoreArrayRead(yin,&ygpu);
504:   } else *z = 0.0;
505:   return(0);
506: }



510: /*
511:  * Operation z[j] = dot(x, y[j])
512:  *
513:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
514:  */
517: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
518: {
519:   PetscErrorCode       ierr;
520:   PetscInt             n = xin->map->n,i;
521:   const ViennaCLVector *xgpu,*ygpu;
522:   Vec                  *yyin = (Vec*)yin;
523:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

526:   if (xin->map->n > 0) {
527:     VecViennaCLGetArrayRead(xin,&xgpu);
528:     for (i=0; i<nv; i++) {
529:       VecViennaCLGetArrayRead(yyin[i],&ygpu);
530:       ygpu_array[i] = ygpu;
531:     }

533:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
534:     ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);

536:     for (i=0; i<nv; i++) {
537:       viennacl::copy(result.begin(), result.end(), z);
538:       VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
539:     }

541:     ViennaCLWaitForGPU();
542:     VecViennaCLRestoreArrayRead(xin,&xgpu);
543:     PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));
544:   } else {
545:     for (i=0; i<nv; i++) z[i] = 0.0;
546:   }
547:   return(0);
548: }



554: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
555: {
556:   ViennaCLVector *xgpu;

560:   if (xin->map->n > 0) {
561:     VecViennaCLGetArrayWrite(xin,&xgpu);
562:     try {
563:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
564:       ViennaCLWaitForGPU();
565:     } catch(std::exception const & ex) {
566:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
567:     }
568:     VecViennaCLRestoreArrayWrite(xin,&xgpu);
569:   }
570:   return(0);
571: }

575: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
576: {
577:   ViennaCLVector *xgpu;

581:   if (alpha == 0.0 && xin->map->n > 0) {
582:     VecSet_SeqViennaCL(xin,alpha);
583:     PetscLogFlops(xin->map->n);
584:   } else if (alpha != 1.0 && xin->map->n > 0) {
585:     VecViennaCLGetArrayReadWrite(xin,&xgpu);
586:     try {
587:       *xgpu *= alpha;
588:       ViennaCLWaitForGPU();
589:     } catch(std::exception const & ex) {
590:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
591:     }
592:     VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
593:     PetscLogFlops(xin->map->n);
594:   }
595:   return(0);
596: }


601: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
602: {

606:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
607:   VecDot_SeqViennaCL(xin, yin, z);
608:   ViennaCLWaitForGPU();
609:   return(0);
610: }


615: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
616: {
617:   const ViennaCLVector *xgpu;
618:   ViennaCLVector       *ygpu;
619:   PetscErrorCode       ierr;

622:   if (xin != yin && xin->map->n > 0) {
623:     if (xin->valid_GPU_array == PETSC_VIENNACL_GPU) {
624:       VecViennaCLGetArrayRead(xin,&xgpu);
625:       VecViennaCLGetArrayWrite(yin,&ygpu);
626:       try {
627:         *ygpu = *xgpu;
628:         ViennaCLWaitForGPU();
629:       } catch(std::exception const & ex) {
630:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
631:       }
632:       VecViennaCLRestoreArrayRead(xin,&xgpu);
633:       VecViennaCLRestoreArrayWrite(yin,&ygpu);

635:     } else if (xin->valid_GPU_array == PETSC_VIENNACL_CPU) {
636:       /* copy in CPU if we are on the CPU*/
637:       VecCopy_SeqViennaCL_Private(xin,yin);
638:       ViennaCLWaitForGPU();
639:     } else if (xin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
640:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
641:       if (yin->valid_GPU_array == PETSC_VIENNACL_CPU) {
642:         /* copy in CPU */
643:         VecCopy_SeqViennaCL_Private(xin,yin);
644:         ViennaCLWaitForGPU();
645:       } else if (yin->valid_GPU_array == PETSC_VIENNACL_GPU) {
646:         /* copy in GPU */
647:         VecViennaCLGetArrayRead(xin,&xgpu);
648:         VecViennaCLGetArrayWrite(yin,&ygpu);
649:         try {
650:           *ygpu = *xgpu;
651:           ViennaCLWaitForGPU();
652:         } catch(std::exception const & ex) {
653:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
654:         }
655:         VecViennaCLRestoreArrayRead(xin,&xgpu);
656:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
657:       } else if (yin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
658:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
659:            default to copy in GPU (this is an arbitrary choice) */
660:         VecViennaCLGetArrayRead(xin,&xgpu);
661:         VecViennaCLGetArrayWrite(yin,&ygpu);
662:         try {
663:           *ygpu = *xgpu;
664:           ViennaCLWaitForGPU();
665:         } catch(std::exception const & ex) {
666:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
667:         }
668:         VecViennaCLRestoreArrayRead(xin,&xgpu);
669:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
670:       } else {
671:         VecCopy_SeqViennaCL_Private(xin,yin);
672:         ViennaCLWaitForGPU();
673:       }
674:     }
675:   }
676:   return(0);
677: }


682: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
683: {
685:   ViennaCLVector *xgpu,*ygpu;

688:   if (xin != yin && xin->map->n > 0) {
689:     VecViennaCLGetArrayReadWrite(xin,&xgpu);
690:     VecViennaCLGetArrayReadWrite(yin,&ygpu);

692:     try {
693:       viennacl::swap(*xgpu, *ygpu);
694:       ViennaCLWaitForGPU();
695:     } catch(std::exception const & ex) {
696:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
697:     }
698:     VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
699:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
700:   }
701:   return(0);
702: }


705: // y = alpha * x + beta * y
708: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
709: {
710:   PetscErrorCode       ierr;
711:   PetscScalar          a = alpha,b = beta;
712:   const ViennaCLVector *xgpu;
713:   ViennaCLVector       *ygpu;

716:   if (a == 0.0 && xin->map->n > 0) {
717:     VecScale_SeqViennaCL(yin,beta);
718:   } else if (b == 1.0 && xin->map->n > 0) {
719:     VecAXPY_SeqViennaCL(yin,alpha,xin);
720:   } else if (a == 1.0 && xin->map->n > 0) {
721:     VecAYPX_SeqViennaCL(yin,beta,xin);
722:   } else if (b == 0.0 && xin->map->n > 0) {
723:     VecViennaCLGetArrayRead(xin,&xgpu);
724:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
725:     try {
726:       *ygpu = *xgpu * alpha;
727:       ViennaCLWaitForGPU();
728:     } catch(std::exception const & ex) {
729:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
730:     }
731:     PetscLogFlops(xin->map->n);
732:     VecViennaCLRestoreArrayRead(xin,&xgpu);
733:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
734:   } else if (xin->map->n > 0) {
735:     VecViennaCLGetArrayRead(xin,&xgpu);
736:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
737:     try {
738:       *ygpu = *xgpu * alpha + *ygpu * beta;
739:       ViennaCLWaitForGPU();
740:     } catch(std::exception const & ex) {
741:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
742:     }
743:     VecViennaCLRestoreArrayRead(xin,&xgpu);
744:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
745:     PetscLogFlops(3.0*xin->map->n);
746:   }
747:   return(0);
748: }


751: /* operation  z = alpha * x + beta *y + gamma *z*/
754: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
755: {
756:   PetscErrorCode       ierr;
757:   PetscInt             n = zin->map->n;
758:   const ViennaCLVector *xgpu,*ygpu;
759:   ViennaCLVector       *zgpu;

762:   VecViennaCLGetArrayRead(xin,&xgpu);
763:   VecViennaCLGetArrayRead(yin,&ygpu);
764:   VecViennaCLGetArrayReadWrite(zin,&zgpu);
765:   if (alpha == 0.0 && xin->map->n > 0) {
766:     try {
767:       if (beta == 0.0) {
768:         *zgpu = gamma * *zgpu;
769:         ViennaCLWaitForGPU();
770:         PetscLogFlops(1.0*n);
771:       } else if (gamma == 0.0) {
772:         *zgpu = beta * *ygpu;
773:         ViennaCLWaitForGPU();
774:         PetscLogFlops(1.0*n);
775:       } else {
776:         *zgpu = beta * *ygpu + gamma * *zgpu;
777:         ViennaCLWaitForGPU();
778:         PetscLogFlops(3.0*n);
779:       }
780:     } catch(std::exception const & ex) {
781:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
782:     }
783:     PetscLogFlops(3.0*n);
784:   } else if (beta == 0.0 && xin->map->n > 0) {
785:     try {
786:       if (gamma == 0.0) {
787:         *zgpu = alpha * *xgpu;
788:         ViennaCLWaitForGPU();
789:         PetscLogFlops(1.0*n);
790:       } else {
791:         *zgpu = alpha * *xgpu + gamma * *zgpu;
792:         ViennaCLWaitForGPU();
793:         PetscLogFlops(3.0*n);
794:       }
795:     } catch(std::exception const & ex) {
796:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
797:     }
798:   } else if (gamma == 0.0 && xin->map->n > 0) {
799:     try {
800:       *zgpu = alpha * *xgpu + beta * *ygpu;
801:       ViennaCLWaitForGPU();
802:     } catch(std::exception const & ex) {
803:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
804:     }
805:     PetscLogFlops(3.0*n);
806:   } else if (xin->map->n > 0) {
807:     try {
808:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
809:       if (gamma != 1.0)
810:         *zgpu *= gamma;
811:       *zgpu += alpha * *xgpu + beta * *ygpu;
812:       ViennaCLWaitForGPU();
813:     } catch(std::exception const & ex) {
814:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
815:     }
816:     VecViennaCLRestoreArrayReadWrite(zin,&zgpu);
817:     VecViennaCLRestoreArrayRead(xin,&xgpu);
818:     VecViennaCLRestoreArrayRead(yin,&ygpu);
819:     PetscLogFlops(5.0*n);
820:   }
821:   return(0);
822: }

826: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
827: {
828:   PetscErrorCode       ierr;
829:   PetscInt             n = win->map->n;
830:   const ViennaCLVector *xgpu,*ygpu;
831:   ViennaCLVector       *wgpu;

834:   if (xin->map->n > 0) {
835:     VecViennaCLGetArrayRead(xin,&xgpu);
836:     VecViennaCLGetArrayRead(yin,&ygpu);
837:     VecViennaCLGetArrayReadWrite(win,&wgpu);
838:     try {
839:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
840:       ViennaCLWaitForGPU();
841:     } catch(std::exception const & ex) {
842:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
843:     }
844:     VecViennaCLRestoreArrayRead(xin,&xgpu);
845:     VecViennaCLRestoreArrayRead(yin,&ygpu);
846:     VecViennaCLRestoreArrayReadWrite(win,&wgpu);
847:     PetscLogFlops(n);
848:   }
849:   return(0);
850: }


855: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
856: {
857:   PetscErrorCode       ierr;
858:   PetscInt             n = xin->map->n;
859:   PetscBLASInt         bn;
860:   const ViennaCLVector *xgpu;

863:   if (xin->map->n > 0) {
864:     PetscBLASIntCast(n,&bn);
865:     VecViennaCLGetArrayRead(xin,&xgpu);
866:     if (type == NORM_2 || type == NORM_FROBENIUS) {
867:       try {
868:         *z = viennacl::linalg::norm_2(*xgpu);
869:         ViennaCLWaitForGPU();
870:       } catch(std::exception const & ex) {
871:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
872:       }
873:       PetscLogFlops(PetscMax(2.0*n-1,0.0));
874:     } else if (type == NORM_INFINITY) {
875:       VecViennaCLGetArrayRead(xin,&xgpu);
876:       try {
877:         *z = viennacl::linalg::norm_inf(*xgpu);
878:         ViennaCLWaitForGPU();
879:       } catch(std::exception const & ex) {
880:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
881:       }
882:       VecViennaCLRestoreArrayRead(xin,&xgpu);
883:     } else if (type == NORM_1) {
884:       try {
885:         *z = viennacl::linalg::norm_1(*xgpu);
886:         ViennaCLWaitForGPU();
887:       } catch(std::exception const & ex) {
888:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
889:       }
890:       PetscLogFlops(PetscMax(n-1.0,0.0));
891:     } else if (type == NORM_1_AND_2) {
892:       try {
893:         *z     = viennacl::linalg::norm_1(*xgpu);
894:         *(z+1) = viennacl::linalg::norm_2(*xgpu);
895:         ViennaCLWaitForGPU();
896:       } catch(std::exception const & ex) {
897:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
898:       }
899:       PetscLogFlops(PetscMax(2.0*n-1,0.0));
900:       PetscLogFlops(PetscMax(n-1.0,0.0));
901:     }
902:     VecViennaCLRestoreArrayRead(xin,&xgpu);
903:   } else if (type == NORM_1_AND_2) {
904:     *z      = 0.0;
905:     *(z+1)  = 0.0;
906:   } else *z = 0.0;
907:   return(0);
908: }


913: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
914: {

918:   VecSetRandom_SeqViennaCL_Private(xin,r);
919:   xin->valid_GPU_array = PETSC_VIENNACL_CPU;
920:   return(0);
921: }

925: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
926: {

930:   VecViennaCLCopyFromGPU(vin);
931:   VecResetArray_SeqViennaCL_Private(vin);
932:   vin->valid_GPU_array = PETSC_VIENNACL_CPU;
933:   return(0);
934: }

938: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
939: {

943:   VecViennaCLCopyFromGPU(vin);
944:   VecPlaceArray_Seq(vin,a);
945:   vin->valid_GPU_array = PETSC_VIENNACL_CPU;
946:   return(0);
947: }


952: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
953: {

957:   VecViennaCLCopyFromGPU(vin);
958:   VecReplaceArray_Seq(vin,a);
959:   vin->valid_GPU_array = PETSC_VIENNACL_CPU;
960:   return(0);
961: }


966: /*@
967:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

969:    Collective on MPI_Comm

971:    Input Parameter:
972: +  comm - the communicator, should be PETSC_COMM_SELF
973: -  n - the vector length

975:    Output Parameter:
976: .  V - the vector

978:    Notes:
979:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
980:    same type as an existing vector.

982:    Level: intermediate

984:    Concepts: vectors^creating sequential

986: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
987: @*/
988: PetscErrorCode  VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
989: {

993:   VecCreate(comm,v);
994:   VecSetSizes(*v,n,n);
995:   VecSetType(*v,VECSEQVIENNACL);
996:   return(0);
997: }


1000: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1001:  *
1002:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1003:  */
1006: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1007: {
1008:   PetscErrorCode                         ierr;

1011:   VecDot_SeqViennaCL(s,t,dp);
1012:   VecNorm_SeqViennaCL(t,NORM_2,nm);
1013:   *nm *= *nm; //squared norm required
1014:   return(0);
1015: }

1019: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1020: {

1024:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1025:   PetscLayoutReference(win->map,&(*V)->map);
1026:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1027:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1028:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1029:   return(0);
1030: }

1034: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1035: {

1039:   try {
1040:     if (v->spptr) {
1041:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1042:       delete (Vec_ViennaCL*) v->spptr;
1043:     }
1044:   } catch(char *ex) {
1045:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1046:   }
1047:   VecDestroy_SeqViennaCL_Private(v);
1048:   return(0);
1049: }


1054: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1055: {
1057:   PetscMPIInt    size;

1060:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1061:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1062:   VecCreate_Seq_Private(V,0);
1063:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);

1065:   V->ops->dot             = VecDot_SeqViennaCL;
1066:   V->ops->norm            = VecNorm_SeqViennaCL;
1067:   V->ops->tdot            = VecTDot_SeqViennaCL;
1068:   V->ops->scale           = VecScale_SeqViennaCL;
1069:   V->ops->copy            = VecCopy_SeqViennaCL;
1070:   V->ops->set             = VecSet_SeqViennaCL;
1071:   V->ops->swap            = VecSwap_SeqViennaCL;
1072:   V->ops->axpy            = VecAXPY_SeqViennaCL;
1073:   V->ops->axpby           = VecAXPBY_SeqViennaCL;
1074:   V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1075:   V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1076:   V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1077:   V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1078:   V->ops->dot_local       = VecDot_SeqViennaCL;
1079:   V->ops->tdot_local      = VecTDot_SeqViennaCL;
1080:   V->ops->norm_local      = VecNorm_SeqViennaCL;
1081:   V->ops->mdot_local      = VecMDot_SeqViennaCL;
1082:   V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1083:   V->ops->mdot            = VecMDot_SeqViennaCL;
1084:   V->ops->aypx            = VecAYPX_SeqViennaCL;
1085:   V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1086:   V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1087:   V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1088:   V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1089:   V->ops->resetarray      = VecResetArray_SeqViennaCL;
1090:   V->ops->destroy         = VecDestroy_SeqViennaCL;
1091:   V->ops->duplicate       = VecDuplicate_SeqViennaCL;

1093:   VecViennaCLAllocateCheck(V);
1094:   V->valid_GPU_array      = PETSC_VIENNACL_GPU;
1095:   VecSet(V,0.0);
1096:   return(0);
1097: }