Actual source code: dvec2.c
petsc-3.7.1 2016-05-15
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc/private/kernels/petscaxpy.h>
11: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
12: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
15: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
16: {
17: PetscErrorCode ierr;
18: PetscInt i,nv_rem,n = xin->map->n;
19: PetscScalar sum0,sum1,sum2,sum3;
20: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
21: Vec *yy;
24: sum0 = 0.0;
25: sum1 = 0.0;
26: sum2 = 0.0;
28: i = nv;
29: nv_rem = nv&0x3;
30: yy = (Vec*)yin;
31: VecGetArrayRead(xin,&x);
33: switch (nv_rem) {
34: case 3:
35: VecGetArrayRead(yy[0],&yy0);
36: VecGetArrayRead(yy[1],&yy1);
37: VecGetArrayRead(yy[2],&yy2);
38: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
39: VecRestoreArrayRead(yy[0],&yy0);
40: VecRestoreArrayRead(yy[1],&yy1);
41: VecRestoreArrayRead(yy[2],&yy2);
42: z[0] = sum0;
43: z[1] = sum1;
44: z[2] = sum2;
45: break;
46: case 2:
47: VecGetArrayRead(yy[0],&yy0);
48: VecGetArrayRead(yy[1],&yy1);
49: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
50: VecRestoreArrayRead(yy[0],&yy0);
51: VecRestoreArrayRead(yy[1],&yy1);
52: z[0] = sum0;
53: z[1] = sum1;
54: break;
55: case 1:
56: VecGetArrayRead(yy[0],&yy0);
57: fortranmdot1_(x,yy0,&n,&sum0);
58: VecRestoreArrayRead(yy[0],&yy0);
59: z[0] = sum0;
60: break;
61: case 0:
62: break;
63: }
64: z += nv_rem;
65: i -= nv_rem;
66: yy += nv_rem;
68: while (i >0) {
69: sum0 = 0.;
70: sum1 = 0.;
71: sum2 = 0.;
72: sum3 = 0.;
73: VecGetArrayRead(yy[0],&yy0);
74: VecGetArrayRead(yy[1],&yy1);
75: VecGetArrayRead(yy[2],&yy2);
76: VecGetArrayRead(yy[3],&yy3);
77: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
78: VecRestoreArrayRead(yy[0],&yy0);
79: VecRestoreArrayRead(yy[1],&yy1);
80: VecRestoreArrayRead(yy[2],&yy2);
81: VecRestoreArrayRead(yy[3],&yy3);
82: yy += 4;
83: z[0] = sum0;
84: z[1] = sum1;
85: z[2] = sum2;
86: z[3] = sum3;
87: z += 4;
88: i -= 4;
89: }
90: VecRestoreArrayRead(xin,&x);
91: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
92: return(0);
93: }
95: #else
98: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
99: {
100: PetscErrorCode ierr;
101: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
102: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
103: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
104: Vec *yy;
107: sum0 = 0.;
108: sum1 = 0.;
109: sum2 = 0.;
111: i = nv;
112: nv_rem = nv&0x3;
113: yy = (Vec*)yin;
114: j = n;
115: VecGetArrayRead(xin,&xbase);
116: x = xbase;
118: switch (nv_rem) {
119: case 3:
120: VecGetArrayRead(yy[0],&yy0);
121: VecGetArrayRead(yy[1],&yy1);
122: VecGetArrayRead(yy[2],&yy2);
123: switch (j_rem=j&0x3) {
124: case 3:
125: x2 = x[2];
126: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
127: sum2 += x2*PetscConj(yy2[2]);
128: case 2:
129: x1 = x[1];
130: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
131: sum2 += x1*PetscConj(yy2[1]);
132: case 1:
133: x0 = x[0];
134: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
135: sum2 += x0*PetscConj(yy2[0]);
136: case 0:
137: x += j_rem;
138: yy0 += j_rem;
139: yy1 += j_rem;
140: yy2 += j_rem;
141: j -= j_rem;
142: break;
143: }
144: while (j>0) {
145: x0 = x[0];
146: x1 = x[1];
147: x2 = x[2];
148: x3 = x[3];
149: x += 4;
151: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
152: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
153: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
154: j -= 4;
155: }
156: z[0] = sum0;
157: z[1] = sum1;
158: z[2] = sum2;
159: VecRestoreArrayRead(yy[0],&yy0);
160: VecRestoreArrayRead(yy[1],&yy1);
161: VecRestoreArrayRead(yy[2],&yy2);
162: break;
163: case 2:
164: VecGetArrayRead(yy[0],&yy0);
165: VecGetArrayRead(yy[1],&yy1);
166: switch (j_rem=j&0x3) {
167: case 3:
168: x2 = x[2];
169: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
170: case 2:
171: x1 = x[1];
172: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
173: case 1:
174: x0 = x[0];
175: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
176: case 0:
177: x += j_rem;
178: yy0 += j_rem;
179: yy1 += j_rem;
180: j -= j_rem;
181: break;
182: }
183: while (j>0) {
184: x0 = x[0];
185: x1 = x[1];
186: x2 = x[2];
187: x3 = x[3];
188: x += 4;
190: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
191: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
192: j -= 4;
193: }
194: z[0] = sum0;
195: z[1] = sum1;
197: VecRestoreArrayRead(yy[0],&yy0);
198: VecRestoreArrayRead(yy[1],&yy1);
199: break;
200: case 1:
201: VecGetArrayRead(yy[0],&yy0);
202: switch (j_rem=j&0x3) {
203: case 3:
204: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
205: case 2:
206: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
207: case 1:
208: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
209: case 0:
210: x += j_rem;
211: yy0 += j_rem;
212: j -= j_rem;
213: break;
214: }
215: while (j>0) {
216: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
217: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
218: yy0 +=4;
219: j -= 4; x+=4;
220: }
221: z[0] = sum0;
223: VecRestoreArrayRead(yy[0],&yy0);
224: break;
225: case 0:
226: break;
227: }
228: z += nv_rem;
229: i -= nv_rem;
230: yy += nv_rem;
232: while (i >0) {
233: sum0 = 0.;
234: sum1 = 0.;
235: sum2 = 0.;
236: sum3 = 0.;
237: VecGetArrayRead(yy[0],&yy0);
238: VecGetArrayRead(yy[1],&yy1);
239: VecGetArrayRead(yy[2],&yy2);
240: VecGetArrayRead(yy[3],&yy3);
242: j = n;
243: x = xbase;
244: switch (j_rem=j&0x3) {
245: case 3:
246: x2 = x[2];
247: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
248: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
249: case 2:
250: x1 = x[1];
251: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
252: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
253: case 1:
254: x0 = x[0];
255: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
256: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
257: case 0:
258: x += j_rem;
259: yy0 += j_rem;
260: yy1 += j_rem;
261: yy2 += j_rem;
262: yy3 += j_rem;
263: j -= j_rem;
264: break;
265: }
266: while (j>0) {
267: x0 = x[0];
268: x1 = x[1];
269: x2 = x[2];
270: x3 = x[3];
271: x += 4;
273: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
274: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
275: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
276: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
277: j -= 4;
278: }
279: z[0] = sum0;
280: z[1] = sum1;
281: z[2] = sum2;
282: z[3] = sum3;
283: z += 4;
284: i -= 4;
285: VecRestoreArrayRead(yy[0],&yy0);
286: VecRestoreArrayRead(yy[1],&yy1);
287: VecRestoreArrayRead(yy[2],&yy2);
288: VecRestoreArrayRead(yy[3],&yy3);
289: yy += 4;
290: }
291: VecRestoreArrayRead(xin,&xbase);
292: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
293: return(0);
294: }
295: #endif
297: /* ----------------------------------------------------------------------------*/
300: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
301: {
302: PetscErrorCode ierr;
303: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
304: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
305: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
306: Vec *yy;
309: sum0 = 0.;
310: sum1 = 0.;
311: sum2 = 0.;
313: i = nv;
314: nv_rem = nv&0x3;
315: yy = (Vec*)yin;
316: j = n;
317: VecGetArrayRead(xin,&xbase);
318: x = xbase;
320: switch (nv_rem) {
321: case 3:
322: VecGetArrayRead(yy[0],&yy0);
323: VecGetArrayRead(yy[1],&yy1);
324: VecGetArrayRead(yy[2],&yy2);
325: switch (j_rem=j&0x3) {
326: case 3:
327: x2 = x[2];
328: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
329: sum2 += x2*yy2[2];
330: case 2:
331: x1 = x[1];
332: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
333: sum2 += x1*yy2[1];
334: case 1:
335: x0 = x[0];
336: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
337: sum2 += x0*yy2[0];
338: case 0:
339: x += j_rem;
340: yy0 += j_rem;
341: yy1 += j_rem;
342: yy2 += j_rem;
343: j -= j_rem;
344: break;
345: }
346: while (j>0) {
347: x0 = x[0];
348: x1 = x[1];
349: x2 = x[2];
350: x3 = x[3];
351: x += 4;
353: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
354: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
355: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
356: j -= 4;
357: }
358: z[0] = sum0;
359: z[1] = sum1;
360: z[2] = sum2;
361: VecRestoreArrayRead(yy[0],&yy0);
362: VecRestoreArrayRead(yy[1],&yy1);
363: VecRestoreArrayRead(yy[2],&yy2);
364: break;
365: case 2:
366: VecGetArrayRead(yy[0],&yy0);
367: VecGetArrayRead(yy[1],&yy1);
368: switch (j_rem=j&0x3) {
369: case 3:
370: x2 = x[2];
371: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
372: case 2:
373: x1 = x[1];
374: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
375: case 1:
376: x0 = x[0];
377: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
378: case 0:
379: x += j_rem;
380: yy0 += j_rem;
381: yy1 += j_rem;
382: j -= j_rem;
383: break;
384: }
385: while (j>0) {
386: x0 = x[0];
387: x1 = x[1];
388: x2 = x[2];
389: x3 = x[3];
390: x += 4;
392: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
393: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
394: j -= 4;
395: }
396: z[0] = sum0;
397: z[1] = sum1;
399: VecRestoreArrayRead(yy[0],&yy0);
400: VecRestoreArrayRead(yy[1],&yy1);
401: break;
402: case 1:
403: VecGetArrayRead(yy[0],&yy0);
404: switch (j_rem=j&0x3) {
405: case 3:
406: x2 = x[2]; sum0 += x2*yy0[2];
407: case 2:
408: x1 = x[1]; sum0 += x1*yy0[1];
409: case 1:
410: x0 = x[0]; sum0 += x0*yy0[0];
411: case 0:
412: x += j_rem;
413: yy0 += j_rem;
414: j -= j_rem;
415: break;
416: }
417: while (j>0) {
418: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
419: j -= 4; x+=4;
420: }
421: z[0] = sum0;
423: VecRestoreArrayRead(yy[0],&yy0);
424: break;
425: case 0:
426: break;
427: }
428: z += nv_rem;
429: i -= nv_rem;
430: yy += nv_rem;
432: while (i >0) {
433: sum0 = 0.;
434: sum1 = 0.;
435: sum2 = 0.;
436: sum3 = 0.;
437: VecGetArrayRead(yy[0],&yy0);
438: VecGetArrayRead(yy[1],&yy1);
439: VecGetArrayRead(yy[2],&yy2);
440: VecGetArrayRead(yy[3],&yy3);
441: x = xbase;
443: j = n;
444: switch (j_rem=j&0x3) {
445: case 3:
446: x2 = x[2];
447: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
448: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
449: case 2:
450: x1 = x[1];
451: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
452: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
453: case 1:
454: x0 = x[0];
455: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
456: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
457: case 0:
458: x += j_rem;
459: yy0 += j_rem;
460: yy1 += j_rem;
461: yy2 += j_rem;
462: yy3 += j_rem;
463: j -= j_rem;
464: break;
465: }
466: while (j>0) {
467: x0 = x[0];
468: x1 = x[1];
469: x2 = x[2];
470: x3 = x[3];
471: x += 4;
473: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
474: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
475: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
476: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
477: j -= 4;
478: }
479: z[0] = sum0;
480: z[1] = sum1;
481: z[2] = sum2;
482: z[3] = sum3;
483: z += 4;
484: i -= 4;
485: VecRestoreArrayRead(yy[0],&yy0);
486: VecRestoreArrayRead(yy[1],&yy1);
487: VecRestoreArrayRead(yy[2],&yy2);
488: VecRestoreArrayRead(yy[3],&yy3);
489: yy += 4;
490: }
491: VecRestoreArrayRead(xin,&xbase);
492: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
493: return(0);
494: }
499: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
500: {
501: PetscInt i,j=0,n = xin->map->n;
502: PetscReal max,tmp;
503: const PetscScalar *xx;
504: PetscErrorCode ierr;
507: VecGetArrayRead(xin,&xx);
508: if (!n) {
509: max = PETSC_MIN_REAL;
510: j = -1;
511: } else {
512: max = PetscRealPart(*xx++); j = 0;
513: for (i=1; i<n; i++) {
514: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
515: }
516: }
517: *z = max;
518: if (idx) *idx = j;
519: VecRestoreArrayRead(xin,&xx);
520: return(0);
521: }
525: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
526: {
527: PetscInt i,j=0,n = xin->map->n;
528: PetscReal min,tmp;
529: const PetscScalar *xx;
530: PetscErrorCode ierr;
533: VecGetArrayRead(xin,&xx);
534: if (!n) {
535: min = PETSC_MAX_REAL;
536: j = -1;
537: } else {
538: min = PetscRealPart(*xx++); j = 0;
539: for (i=1; i<n; i++) {
540: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
541: }
542: }
543: *z = min;
544: if (idx) *idx = j;
545: VecRestoreArrayRead(xin,&xx);
546: return(0);
547: }
551: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
552: {
553: PetscInt i,n = xin->map->n;
554: PetscScalar *xx;
558: VecGetArray(xin,&xx);
559: if (alpha == (PetscScalar)0.0) {
560: PetscMemzero(xx,n*sizeof(PetscScalar));
561: } else {
562: for (i=0; i<n; i++) xx[i] = alpha;
563: }
564: VecRestoreArray(xin,&xx);
565: return(0);
566: }
570: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
571: {
572: PetscErrorCode ierr;
573: PetscInt n = xin->map->n,j,j_rem;
574: const PetscScalar *yy0,*yy1,*yy2,*yy3;
575: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
577: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
578: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
579: #endif
582: PetscLogFlops(nv*2.0*n);
583: VecGetArray(xin,&xx);
584: switch (j_rem=nv&0x3) {
585: case 3:
586: VecGetArrayRead(y[0],&yy0);
587: VecGetArrayRead(y[1],&yy1);
588: VecGetArrayRead(y[2],&yy2);
589: alpha0 = alpha[0];
590: alpha1 = alpha[1];
591: alpha2 = alpha[2];
592: alpha += 3;
593: PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
594: VecRestoreArrayRead(y[0],&yy0);
595: VecRestoreArrayRead(y[1],&yy1);
596: VecRestoreArrayRead(y[2],&yy2);
597: y += 3;
598: break;
599: case 2:
600: VecGetArrayRead(y[0],&yy0);
601: VecGetArrayRead(y[1],&yy1);
602: alpha0 = alpha[0];
603: alpha1 = alpha[1];
604: alpha +=2;
605: PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
606: VecRestoreArrayRead(y[0],&yy0);
607: VecRestoreArrayRead(y[1],&yy1);
608: y +=2;
609: break;
610: case 1:
611: VecGetArrayRead(y[0],&yy0);
612: alpha0 = *alpha++;
613: PetscKernelAXPY(xx,alpha0,yy0,n);
614: VecRestoreArrayRead(y[0],&yy0);
615: y +=1;
616: break;
617: }
618: for (j=j_rem; j<nv; j+=4) {
619: VecGetArrayRead(y[0],&yy0);
620: VecGetArrayRead(y[1],&yy1);
621: VecGetArrayRead(y[2],&yy2);
622: VecGetArrayRead(y[3],&yy3);
623: alpha0 = alpha[0];
624: alpha1 = alpha[1];
625: alpha2 = alpha[2];
626: alpha3 = alpha[3];
627: alpha += 4;
629: PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
630: VecRestoreArrayRead(y[0],&yy0);
631: VecRestoreArrayRead(y[1],&yy1);
632: VecRestoreArrayRead(y[2],&yy2);
633: VecRestoreArrayRead(y[3],&yy3);
634: y += 4;
635: }
636: VecRestoreArray(xin,&xx);
637: return(0);
638: }
640: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
644: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
645: {
646: PetscErrorCode ierr;
647: PetscInt n = yin->map->n;
648: PetscScalar *yy;
649: const PetscScalar *xx;
652: if (alpha == (PetscScalar)0.0) {
653: VecCopy(xin,yin);
654: } else if (alpha == (PetscScalar)1.0) {
655: VecAXPY_Seq(yin,alpha,xin);
656: } else if (alpha == (PetscScalar)-1.0) {
657: PetscInt i;
658: VecGetArrayRead(xin,&xx);
659: VecGetArray(yin,&yy);
661: for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];
663: VecRestoreArrayRead(xin,&xx);
664: VecRestoreArray(yin,&yy);
665: PetscLogFlops(1.0*n);
666: } else {
667: VecGetArrayRead(xin,&xx);
668: VecGetArray(yin,&yy);
669: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
670: {
671: PetscScalar oalpha = alpha;
672: fortranaypx_(&n,&oalpha,xx,yy);
673: }
674: #else
675: {
676: PetscInt i;
678: for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
679: }
680: #endif
681: VecRestoreArrayRead(xin,&xx);
682: VecRestoreArray(yin,&yy);
683: PetscLogFlops(2.0*n);
684: }
685: return(0);
686: }
688: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
689: /*
690: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
691: to be slower than a regular C loop. Hence,we do not include it.
692: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
693: */
697: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
698: {
699: PetscErrorCode ierr;
700: PetscInt i,n = win->map->n;
701: PetscScalar *ww;
702: const PetscScalar *yy,*xx;
705: VecGetArrayRead(xin,&xx);
706: VecGetArrayRead(yin,&yy);
707: VecGetArray(win,&ww);
708: if (alpha == (PetscScalar)1.0) {
709: PetscLogFlops(n);
710: /* could call BLAS axpy after call to memcopy, but may be slower */
711: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
712: } else if (alpha == (PetscScalar)-1.0) {
713: PetscLogFlops(n);
714: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
715: } else if (alpha == (PetscScalar)0.0) {
716: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
717: } else {
718: PetscScalar oalpha = alpha;
719: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
720: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
721: #else
722: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
723: #endif
724: PetscLogFlops(2.0*n);
725: }
726: VecRestoreArrayRead(xin,&xx);
727: VecRestoreArrayRead(yin,&yy);
728: VecRestoreArray(win,&ww);
729: return(0);
730: }
734: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
735: {
736: PetscErrorCode ierr;
737: PetscInt n = xin->map->n,i;
738: const PetscScalar *xx,*yy;
739: PetscReal m = 0.0;
742: VecGetArrayRead(xin,&xx);
743: VecGetArrayRead(yin,&yy);
744: for (i = 0; i < n; i++) {
745: if (yy[i] != (PetscScalar)0.0) {
746: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
747: } else {
748: m = PetscMax(PetscAbsScalar(xx[i]), m);
749: }
750: }
751: VecRestoreArrayRead(xin,&xx);
752: VecRestoreArrayRead(yin,&yy);
753: MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
754: PetscLogFlops(n);
755: return(0);
756: }
760: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
761: {
762: Vec_Seq *v = (Vec_Seq*)vin->data;
765: if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
766: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
767: v->array = (PetscScalar*)a;
768: return(0);
769: }
773: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
774: {
775: Vec_Seq *v = (Vec_Seq*)vin->data;
779: PetscFree(v->array_allocated);
780: v->array_allocated = v->array = (PetscScalar*)a;
781: return(0);
782: }