Actual source code: bvec1.c
petsc-3.7.1 2016-05-15
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
8: #include <petscblaslapack.h>
12: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
13: {
14: const PetscScalar *ya,*xa;
15: PetscBLASInt one = 1,bn;
16: PetscErrorCode ierr;
19: PetscBLASIntCast(xin->map->n,&bn);
20: VecGetArrayRead(xin,&xa);
21: VecGetArrayRead(yin,&ya);
22: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
23: PetscStackCallBLAS("BLASdot",*z = BLASdot_(&bn,ya,&one,xa,&one));
24: VecRestoreArrayRead(xin,&xa);
25: VecRestoreArrayRead(yin,&ya);
26: if (xin->map->n > 0) {
27: PetscLogFlops(2.0*xin->map->n-1);
28: }
29: return(0);
30: }
34: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
35: {
36: const PetscScalar *ya,*xa;
37: PetscBLASInt one = 1,bn;
38: PetscErrorCode ierr;
41: PetscBLASIntCast(xin->map->n,&bn);
42: VecGetArrayRead(xin,&xa);
43: VecGetArrayRead(yin,&ya);
44: PetscStackCallBLAS("BLASdot",*z = BLASdotu_(&bn,xa,&one,ya,&one));
45: VecRestoreArrayRead(xin,&xa);
46: VecRestoreArrayRead(yin,&ya);
47: if (xin->map->n > 0) {
48: PetscLogFlops(2.0*xin->map->n-1);
49: }
50: return(0);
51: }
55: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
56: {
58: PetscBLASInt one = 1,bn;
61: PetscBLASIntCast(xin->map->n,&bn);
62: if (alpha == (PetscScalar)0.0) {
63: VecSet_Seq(xin,alpha);
64: } else if (alpha != (PetscScalar)1.0) {
65: PetscScalar a = alpha,*xarray;
66: VecGetArray(xin,&xarray);
67: PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a,xarray,&one));
68: VecRestoreArray(xin,&xarray);
69: }
70: PetscLogFlops(xin->map->n);
71: return(0);
72: }
76: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
77: {
78: PetscErrorCode ierr;
79: const PetscScalar *xarray;
80: PetscScalar *yarray;
81: PetscBLASInt one = 1,bn;
84: PetscBLASIntCast(yin->map->n,&bn);
85: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
86: if (alpha != (PetscScalar)0.0) {
87: VecGetArrayRead(xin,&xarray);
88: VecGetArray(yin,&yarray);
89: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one));
90: VecRestoreArrayRead(xin,&xarray);
91: VecRestoreArray(yin,&yarray);
92: PetscLogFlops(2.0*yin->map->n);
93: }
94: return(0);
95: }
99: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
100: {
101: PetscErrorCode ierr;
102: PetscInt n = yin->map->n,i;
103: const PetscScalar *xx;
104: PetscScalar *yy,a = alpha,b = beta;
107: if (a == (PetscScalar)0.0) {
108: VecScale_Seq(yin,beta);
109: } else if (b == (PetscScalar)1.0) {
110: VecAXPY_Seq(yin,alpha,xin);
111: } else if (a == (PetscScalar)1.0) {
112: VecAYPX_Seq(yin,beta,xin);
113: } else if (b == (PetscScalar)0.0) {
114: VecGetArrayRead(xin,&xx);
115: VecGetArray(yin,(PetscScalar**)&yy);
117: for (i=0; i<n; i++) yy[i] = a*xx[i];
119: VecRestoreArrayRead(xin,&xx);
120: VecRestoreArray(yin,(PetscScalar**)&yy);
121: PetscLogFlops(xin->map->n);
122: } else {
123: VecGetArrayRead(xin,&xx);
124: VecGetArray(yin,(PetscScalar**)&yy);
126: for (i=0; i<n; i++) yy[i] = a*xx[i] + b*yy[i];
128: VecRestoreArrayRead(xin,&xx);
129: VecRestoreArray(yin,(PetscScalar**)&yy);
130: PetscLogFlops(3.0*xin->map->n);
131: }
132: return(0);
133: }
137: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
138: {
139: PetscErrorCode ierr;
140: PetscInt n = zin->map->n,i;
141: const PetscScalar *yy,*xx;
142: PetscScalar *zz;
145: VecGetArrayRead(xin,&xx);
146: VecGetArrayRead(yin,&yy);
147: VecGetArray(zin,&zz);
148: if (alpha == (PetscScalar)1.0) {
149: for (i=0; i<n; i++) zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
150: PetscLogFlops(4.0*n);
151: } else if (gamma == (PetscScalar)1.0) {
152: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
153: PetscLogFlops(4.0*n);
154: } else if (gamma == (PetscScalar)0.0) {
155: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i];
156: PetscLogFlops(3.0*n);
157: } else {
158: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
159: PetscLogFlops(5.0*n);
160: }
161: VecRestoreArrayRead(xin,&xx);
162: VecRestoreArrayRead(yin,&yy);
163: VecRestoreArray(zin,&zz);
164: return(0);
165: }