Actual source code: bvec1.c
petsc-3.12.2 2019-11-22
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <petscblaslapack.h>
10: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
11: {
12: const PetscScalar *ya,*xa;
13: PetscBLASInt one = 1,bn;
14: PetscErrorCode ierr;
17: PetscBLASIntCast(xin->map->n,&bn);
18: VecGetArrayRead(xin,&xa);
19: VecGetArrayRead(yin,&ya);
20: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
21: PetscStackCallBLAS("BLASdot",*z = BLASdot_(&bn,ya,&one,xa,&one));
22: VecRestoreArrayRead(xin,&xa);
23: VecRestoreArrayRead(yin,&ya);
24: if (xin->map->n > 0) {
25: PetscLogFlops(2.0*xin->map->n-1);
26: }
27: return(0);
28: }
30: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
31: {
32: const PetscScalar *ya,*xa;
33: PetscBLASInt one = 1,bn;
34: PetscErrorCode ierr;
37: PetscBLASIntCast(xin->map->n,&bn);
38: VecGetArrayRead(xin,&xa);
39: VecGetArrayRead(yin,&ya);
40: PetscStackCallBLAS("BLASdot",*z = BLASdotu_(&bn,xa,&one,ya,&one));
41: VecRestoreArrayRead(xin,&xa);
42: VecRestoreArrayRead(yin,&ya);
43: if (xin->map->n > 0) {
44: PetscLogFlops(2.0*xin->map->n-1);
45: }
46: return(0);
47: }
49: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
50: {
52: PetscBLASInt one = 1,bn;
55: PetscBLASIntCast(xin->map->n,&bn);
56: if (alpha == (PetscScalar)0.0) {
57: VecSet_Seq(xin,alpha);
58: } else if (alpha != (PetscScalar)1.0) {
59: PetscScalar a = alpha,*xarray;
60: VecGetArray(xin,&xarray);
61: PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a,xarray,&one));
62: VecRestoreArray(xin,&xarray);
63: }
64: PetscLogFlops(xin->map->n);
65: return(0);
66: }
68: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
69: {
70: PetscErrorCode ierr;
71: const PetscScalar *xarray;
72: PetscScalar *yarray;
73: PetscBLASInt one = 1,bn;
76: PetscBLASIntCast(yin->map->n,&bn);
77: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
78: if (alpha != (PetscScalar)0.0) {
79: VecGetArrayRead(xin,&xarray);
80: VecGetArray(yin,&yarray);
81: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one));
82: VecRestoreArrayRead(xin,&xarray);
83: VecRestoreArray(yin,&yarray);
84: PetscLogFlops(2.0*yin->map->n);
85: }
86: return(0);
87: }
89: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar a,PetscScalar b,Vec xin)
90: {
91: PetscErrorCode ierr;
92: PetscInt n = yin->map->n,i;
93: const PetscScalar *xx;
94: PetscScalar *yy;
97: if (a == (PetscScalar)0.0) {
98: VecScale_Seq(yin,b);
99: } else if (b == (PetscScalar)1.0) {
100: VecAXPY_Seq(yin,a,xin);
101: } else if (a == (PetscScalar)1.0) {
102: VecAYPX_Seq(yin,b,xin);
103: } else if (b == (PetscScalar)0.0) {
104: VecGetArrayRead(xin,&xx);
105: VecGetArray(yin,(PetscScalar**)&yy);
106: for (i=0; i<n; i++) yy[i] = a*xx[i];
107: VecRestoreArrayRead(xin,&xx);
108: VecRestoreArray(yin,(PetscScalar**)&yy);
109: PetscLogFlops(xin->map->n);
110: } else {
111: VecGetArrayRead(xin,&xx);
112: VecGetArray(yin,(PetscScalar**)&yy);
113: for (i=0; i<n; i++) yy[i] = a*xx[i] + b*yy[i];
114: VecRestoreArrayRead(xin,&xx);
115: VecRestoreArray(yin,(PetscScalar**)&yy);
116: PetscLogFlops(3.0*xin->map->n);
117: }
118: return(0);
119: }
121: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
122: {
123: PetscErrorCode ierr;
124: PetscInt n = zin->map->n,i;
125: const PetscScalar *yy,*xx;
126: PetscScalar *zz;
129: VecGetArrayRead(xin,&xx);
130: VecGetArrayRead(yin,&yy);
131: VecGetArray(zin,&zz);
132: if (alpha == (PetscScalar)1.0) {
133: for (i=0; i<n; i++) zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
134: PetscLogFlops(4.0*n);
135: } else if (gamma == (PetscScalar)1.0) {
136: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
137: PetscLogFlops(4.0*n);
138: } else if (gamma == (PetscScalar)0.0) {
139: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i];
140: PetscLogFlops(3.0*n);
141: } else {
142: for (i=0; i<n; i++) zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
143: PetscLogFlops(5.0*n);
144: }
145: VecRestoreArrayRead(xin,&xx);
146: VecRestoreArrayRead(yin,&yy);
147: VecRestoreArray(zin,&zz);
148: return(0);
149: }