Actual source code: bvec1.c
petsc-3.4.2 2013-07-02
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
8: #include <petscblaslapack.h>
9: #include <petscthreadcomm.h>
11: #if defined(PETSC_THREADCOMM_ACTIVE)
12: PetscErrorCode VecDot_kernel(PetscInt thread_id,Vec xin,Vec yin,PetscThreadCommReduction red)
13: {
14: PetscErrorCode ierr;
15: PetscInt *trstarts=xin->map->trstarts;
16: PetscInt start,end,n;
17: PetscBLASInt one = 1,bn;
18: const PetscScalar *xa,*ya;
19: PetscScalar z_loc;
21: VecGetArrayRead(xin,&xa);
22: VecGetArrayRead(yin,&ya);
23: start = trstarts[thread_id];
24: end = trstarts[thread_id+1];
25: n = end-start;
26: PetscBLASIntCast(n,&bn);
27: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
28: PetscStackCallBLAS("BLASdot",z_loc = BLASdot_(&bn,ya+start,&one,xa+start,&one));
30: PetscThreadReductionKernelPost(thread_id,red,(void*)&z_loc);
32: VecRestoreArrayRead(xin,&xa);
33: VecRestoreArrayRead(yin,&ya);
34: return 0;
35: }
39: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
40: {
41: PetscErrorCode ierr;
42: PetscThreadCommReduction red;
45: PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_SUM,PETSC_SCALAR,1,&red);
46: PetscThreadCommRunKernel3(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecDot_kernel,xin,yin,red);
47: PetscThreadReductionEnd(red,z);
48: if (xin->map->n > 0) {
49: PetscLogFlops(2.0*xin->map->n-1);
50: }
51: return(0);
52: }
53: #else
56: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
57: {
58: const PetscScalar *ya,*xa;
59: PetscBLASInt one = 1,bn;
60: PetscErrorCode ierr;
63: PetscBLASIntCast(xin->map->n,&bn);
64: VecGetArrayRead(xin,&xa);
65: VecGetArrayRead(yin,&ya);
66: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
67: PetscStackCallBLAS("BLASdot",*z = BLASdot_(&bn,ya,&one,xa,&one));
68: VecRestoreArrayRead(xin,&xa);
69: VecRestoreArrayRead(yin,&ya);
70: if (xin->map->n > 0) {
71: PetscLogFlops(2.0*xin->map->n-1);
72: }
73: return(0);
74: }
75: #endif
77: #if defined(PETSC_THREADCOMM_ACTIVE)
78: PetscErrorCode VecTDot_kernel(PetscInt thread_id,Vec xin,Vec yin,PetscThreadCommReduction red)
79: {
80: PetscErrorCode ierr;
81: PetscInt *trstarts=xin->map->trstarts;
82: PetscInt start,end,n;
83: PetscBLASInt one=1,bn;
84: const PetscScalar *xa,*ya;
85: PetscScalar z_loc;
87: VecGetArrayRead(xin,&xa);
88: VecGetArrayRead(yin,&ya);
89: start = trstarts[thread_id];
90: end = trstarts[thread_id+1];
91: n = end-start;
92: PetscBLASIntCast(n,&bn);
93: PetscStackCallBLAS("BLASdot",z_loc = BLASdotu_(&bn,xa+start,&one,ya+start,&one));
95: PetscThreadReductionKernelPost(thread_id,red,(void*)&z_loc);
97: VecRestoreArrayRead(xin,&xa);
98: VecRestoreArrayRead(yin,&ya);
99: return 0;
100: }
104: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
105: {
106: PetscErrorCode ierr;
107: PetscThreadCommReduction red;
110: PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_SUM,PETSC_SCALAR,1,&red);
111: PetscThreadCommRunKernel3(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecTDot_kernel,xin,yin,red);
112: PetscThreadReductionEnd(red,z);
113: if (xin->map->n > 0) {
114: PetscLogFlops(2.0*xin->map->n-1);
115: }
116: return(0);
117: }
118: #else
121: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
122: {
123: const PetscScalar *ya,*xa;
124: PetscBLASInt one = 1,bn;
125: PetscErrorCode ierr;
128: PetscBLASIntCast(xin->map->n,&bn);
129: VecGetArrayRead(xin,&xa);
130: VecGetArrayRead(yin,&ya);
131: PetscStackCallBLAS("BLASdot",*z = BLASdotu_(&bn,xa,&one,ya,&one));
132: VecRestoreArrayRead(xin,&xa);
133: VecRestoreArrayRead(yin,&ya);
134: if (xin->map->n > 0) {
135: PetscLogFlops(2.0*xin->map->n-1);
136: }
137: return(0);
138: }
139: #endif
141: #if defined(PETSC_THREADCOMM_ACTIVE)
142: PetscErrorCode VecScale_kernel(PetscInt thread_id,Vec xin,PetscScalar *alpha_p)
143: {
145: PetscScalar *xx;
146: PetscInt start,end,n;
147: PetscBLASInt one=1,bn;
148: PetscInt *trstarts=xin->map->trstarts;
149: PetscScalar alpha=*alpha_p;
151: start = trstarts[thread_id];
152: end = trstarts[thread_id+1];
153: n = end-start;
154: VecGetArray(xin,&xx);
155: PetscBLASIntCast(n,&bn);
156: PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&alpha,xx+start,&one));
157: VecRestoreArray(xin,&xx);
158: return 0;
159: }
163: PetscErrorCode VecScale_Seq(Vec xin,PetscScalar alpha)
164: {
168: if (alpha == (PetscScalar)0.0) {
169: VecSet_Seq(xin,alpha);
170: } else if (alpha != (PetscScalar)1.0) {
171: PetscScalar *scalar;
172: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)xin),&scalar,NULL,NULL);
173: *scalar = alpha;
174: PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecScale_kernel,xin,scalar);
175: }
176: PetscLogFlops(xin->map->n);
177: return(0);
178: }
179: #else
182: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
183: {
185: PetscBLASInt one = 1,bn;
188: PetscBLASIntCast(xin->map->n,&bn);
189: if (alpha == (PetscScalar)0.0) {
190: VecSet_Seq(xin,alpha);
191: } else if (alpha != (PetscScalar)1.0) {
192: PetscScalar a = alpha,*xarray;
193: VecGetArray(xin,&xarray);
194: PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a,xarray,&one));
195: VecRestoreArray(xin,&xarray);
196: }
197: PetscLogFlops(xin->map->n);
198: return(0);
199: }
200: #endif
202: #if defined(PETSC_THREADCOMM_ACTIVE)
203: PetscErrorCode VecAXPY_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
204: {
205: PetscErrorCode ierr;
206: const PetscScalar *xarray;
207: PetscScalar *yarray;
208: PetscBLASInt one=1,bn;
209: PetscInt *trstarts=yin->map->trstarts,start,end,n;
210: PetscScalar alpha = *alpha_p;
212: start = trstarts[thread_id];
213: end = trstarts[thread_id+1];
214: n = end - start;
215: PetscBLASIntCast(n,&bn);
216: VecGetArrayRead(xin,&xarray);
217: VecGetArray(yin,&yarray);
218: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bn,&alpha,xarray+start,&one,yarray+start,&one));
219: VecRestoreArrayRead(xin,&xarray);
220: VecRestoreArray(yin,&yarray);
222: return 0;
223: }
226: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
227: {
231: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
232: if (alpha != (PetscScalar)0.0) {
233: PetscScalar *scalar;
234: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)yin),&scalar,NULL,NULL);
235: *scalar = alpha;
236: PetscThreadCommRunKernel3(PetscObjectComm((PetscObject)yin),(PetscThreadKernel)VecAXPY_kernel,yin,scalar,xin);
237: PetscLogFlops(2.0*yin->map->n);
238: }
239: return(0);
240: }
241: #else
244: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
245: {
246: PetscErrorCode ierr;
247: const PetscScalar *xarray;
248: PetscScalar *yarray;
249: PetscBLASInt one = 1,bn;
252: PetscBLASIntCast(yin->map->n,&bn);
253: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
254: if (alpha != (PetscScalar)0.0) {
255: VecGetArrayRead(xin,&xarray);
256: VecGetArray(yin,&yarray);
257: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one));
258: VecRestoreArrayRead(xin,&xarray);
259: VecRestoreArray(yin,&yarray);
260: PetscLogFlops(2.0*yin->map->n);
261: }
262: return(0);
263: }
264: #endif
266: #if defined(PETSC_THREADCOMM_ACTIVE)
267: PetscErrorCode VecAXPBY_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,PetscScalar *beta_p,Vec xin)
268: {
269: PetscErrorCode ierr;
270: const PetscScalar *xx;
271: PetscScalar *yy;
272: PetscInt *trstarts=yin->map->trstarts,i;
273: PetscScalar a=*alpha_p,b=*beta_p;
275: VecGetArrayRead(xin,&xx);
276: VecGetArray(yin,&yy);
278: if (b == (PetscScalar)0.0) {
279: for (i=trstarts[thread_id];i < trstarts[thread_id+1];i++) yy[i] = a*xx[i];
280: } else {
281: for (i=trstarts[thread_id];i < trstarts[thread_id+1];i++) yy[i] = a*xx[i] + b*yy[i];
282: }
283: VecRestoreArrayRead(xin,&xx);
284: VecRestoreArray(yin,&yy);
285: return 0;
286: }
290: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
291: {
295: if (alpha == (PetscScalar)0.0) {
296: VecScale_Seq(yin,beta);
297: } else if (beta == (PetscScalar)1.0) {
298: VecAXPY_Seq(yin,alpha,xin);
299: } else if (alpha == (PetscScalar)1.0) {
300: VecAYPX_Seq(yin,beta,xin);
301: } else {
302: PetscScalar *scal1,*scal2;
303: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)yin),&scal1,&scal2,NULL);
304: *scal1 = alpha; *scal2 = beta;
305: PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)yin),(PetscThreadKernel)VecAXPBY_kernel,yin,scal1,scal2,xin);
306: if (beta == (PetscScalar)0.0) {
307: PetscLogFlops(yin->map->n);
308: } else {
309: PetscLogFlops(3.0*yin->map->n);
310: }
311: }
312: return(0);
313: }
314: #else
317: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
318: {
319: PetscErrorCode ierr;
320: PetscInt n = yin->map->n,i;
321: const PetscScalar *xx;
322: PetscScalar *yy,a = alpha,b = beta;
325: if (a == (PetscScalar)0.0) {
326: VecScale_Seq(yin,beta);
327: } else if (b == (PetscScalar)1.0) {
328: VecAXPY_Seq(yin,alpha,xin);
329: } else if (a == (PetscScalar)1.0) {
330: VecAYPX_Seq(yin,beta,xin);
331: } else if (b == (PetscScalar)0.0) {
332: VecGetArrayRead(xin,&xx);
333: VecGetArray(yin,(PetscScalar**)&yy);
335: for (i=0; i<n; i++) yy[i] = a*xx[i];
337: VecRestoreArrayRead(xin,&xx);
338: VecRestoreArray(yin,(PetscScalar**)&yy);
339: PetscLogFlops(xin->map->n);
340: } else {
341: VecGetArrayRead(xin,&xx);
342: VecGetArray(yin,(PetscScalar**)&yy);
344: for (i=0; i<n; i++) yy[i] = a*xx[i] + b*yy[i];
346: VecRestoreArrayRead(xin,&xx);
347: VecRestoreArray(yin,(PetscScalar**)&yy);
348: PetscLogFlops(3.0*xin->map->n);
349: }
350: return(0);
351: }
352: #endif
354: #if defined(PETSC_THREADCOMM_ACTIVE)
355: PetscErrorCode VecAXPBYPCZ_kernel(PetscInt thread_id,Vec zin,PetscScalar *alpha_p,PetscScalar *beta_p,PetscScalar *gamma_p,Vec xin,Vec yin)
356: {
357: PetscErrorCode ierr;
358: const PetscScalar *xx,*yy;
359: PetscScalar *zz;
360: PetscInt *trstarts=zin->map->trstarts,i;
361: PetscScalar alpha=*alpha_p,beta=*beta_p,gamma=*gamma_p;
363: VecGetArrayRead(xin,&xx);
364: VecGetArrayRead(yin,&yy);
365: VecGetArray(zin,&zz);
367: if (alpha == (PetscScalar)1.0) {
368: for (i=trstarts[thread_id]; i < trstarts[thread_id+1]; i++) zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
369: } else if (gamma == (PetscScalar)1.0) {
370: for (i=trstarts[thread_id]; i < trstarts[thread_id+1]; i++) zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
371: } else if (gamma == (PetscScalar)0.0) {
372: for (i=trstarts[thread_id]; i < trstarts[thread_id+1]; i++) zz[i] = alpha*xx[i] + beta*yy[i];
373: } else {
374: for (i=trstarts[thread_id]; i < trstarts[thread_id+1]; i++) zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
375: }
376: VecGetArrayRead(xin,&xx);
377: VecGetArrayRead(yin,&yy);
378: VecGetArray(zin,&zz);
379: return 0;
380: }
384: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
385: {
387: PetscScalar *scal1,*scal2,*scal3;
390: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)zin),&scal1,&scal2,&scal3);
391: *scal1 = alpha; *scal2 = beta; *scal3 = gamma;
392: PetscThreadCommRunKernel6(PetscObjectComm((PetscObject)zin),(PetscThreadKernel)VecAXPBYPCZ_kernel,zin,scal1,scal2,scal3,xin,yin);
393: if (alpha == (PetscScalar)1.0) {
394: PetscLogFlops(4.0*zin->map->n);
395: } else if (gamma == (PetscScalar)1.0) {
396: PetscLogFlops(4.0*zin->map->n);
397: } else if (gamma == (PetscScalar)0.0) {
398: PetscLogFlops(3.0*zin->map->n);
399: } else {
400: PetscLogFlops(5.0*zin->map->n);
401: }
402: return(0);
403: }
404: #else
407: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
408: {
409: PetscErrorCode ierr;
410: PetscInt n = zin->map->n,i;
411: const PetscScalar *yy,*xx;
412: PetscScalar *zz;
415: VecGetArrayRead(xin,&xx);
416: VecGetArrayRead(yin,&yy);
417: VecGetArray(zin,&zz);
418: if (alpha == (PetscScalar)1.0) {
419: for (i=0; i<n; i++) zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
420: PetscLogFlops(4.0*n);
421: } else if (gamma == (PetscScalar)1.0) {
422: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
423: PetscLogFlops(4.0*n);
424: } else if (gamma == (PetscScalar)0.0) {
425: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i];
426: PetscLogFlops(3.0*n);
427: } else {
428: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
429: PetscLogFlops(5.0*n);
430: }
431: VecRestoreArrayRead(xin,&xx);
432: VecRestoreArrayRead(yin,&yy);
433: VecRestoreArray(zin,&zz);
434: return(0);
435: }
436: #endif