Actual source code: dvec2.c

petsc-3.7.1 2016-05-15
Report Typos and Errors
  2: /*
  3:    Defines some vector operation functions that are shared by
  4:   sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petsc/private/kernels/petscaxpy.h>



 11: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 12: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 15: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 16: {
 17:   PetscErrorCode    ierr;
 18:   PetscInt          i,nv_rem,n = xin->map->n;
 19:   PetscScalar       sum0,sum1,sum2,sum3;
 20:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 21:   Vec               *yy;

 24:   sum0 = 0.0;
 25:   sum1 = 0.0;
 26:   sum2 = 0.0;

 28:   i      = nv;
 29:   nv_rem = nv&0x3;
 30:   yy     = (Vec*)yin;
 31:   VecGetArrayRead(xin,&x);

 33:   switch (nv_rem) {
 34:   case 3:
 35:     VecGetArrayRead(yy[0],&yy0);
 36:     VecGetArrayRead(yy[1],&yy1);
 37:     VecGetArrayRead(yy[2],&yy2);
 38:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 39:     VecRestoreArrayRead(yy[0],&yy0);
 40:     VecRestoreArrayRead(yy[1],&yy1);
 41:     VecRestoreArrayRead(yy[2],&yy2);
 42:     z[0] = sum0;
 43:     z[1] = sum1;
 44:     z[2] = sum2;
 45:     break;
 46:   case 2:
 47:     VecGetArrayRead(yy[0],&yy0);
 48:     VecGetArrayRead(yy[1],&yy1);
 49:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 50:     VecRestoreArrayRead(yy[0],&yy0);
 51:     VecRestoreArrayRead(yy[1],&yy1);
 52:     z[0] = sum0;
 53:     z[1] = sum1;
 54:     break;
 55:   case 1:
 56:     VecGetArrayRead(yy[0],&yy0);
 57:     fortranmdot1_(x,yy0,&n,&sum0);
 58:     VecRestoreArrayRead(yy[0],&yy0);
 59:     z[0] = sum0;
 60:     break;
 61:   case 0:
 62:     break;
 63:   }
 64:   z  += nv_rem;
 65:   i  -= nv_rem;
 66:   yy += nv_rem;

 68:   while (i >0) {
 69:     sum0 = 0.;
 70:     sum1 = 0.;
 71:     sum2 = 0.;
 72:     sum3 = 0.;
 73:     VecGetArrayRead(yy[0],&yy0);
 74:     VecGetArrayRead(yy[1],&yy1);
 75:     VecGetArrayRead(yy[2],&yy2);
 76:     VecGetArrayRead(yy[3],&yy3);
 77:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 78:     VecRestoreArrayRead(yy[0],&yy0);
 79:     VecRestoreArrayRead(yy[1],&yy1);
 80:     VecRestoreArrayRead(yy[2],&yy2);
 81:     VecRestoreArrayRead(yy[3],&yy3);
 82:     yy  += 4;
 83:     z[0] = sum0;
 84:     z[1] = sum1;
 85:     z[2] = sum2;
 86:     z[3] = sum3;
 87:     z   += 4;
 88:     i   -= 4;
 89:   }
 90:   VecRestoreArrayRead(xin,&x);
 91:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 92:   return(0);
 93: }

 95: #else
 98: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 99: {
100:   PetscErrorCode    ierr;
101:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
102:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
103:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
104:   Vec               *yy;

107:   sum0 = 0.;
108:   sum1 = 0.;
109:   sum2 = 0.;

111:   i      = nv;
112:   nv_rem = nv&0x3;
113:   yy     = (Vec*)yin;
114:   j      = n;
115:   VecGetArrayRead(xin,&xbase);
116:   x      = xbase;

118:   switch (nv_rem) {
119:   case 3:
120:     VecGetArrayRead(yy[0],&yy0);
121:     VecGetArrayRead(yy[1],&yy1);
122:     VecGetArrayRead(yy[2],&yy2);
123:     switch (j_rem=j&0x3) {
124:     case 3:
125:       x2    = x[2];
126:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
127:       sum2 += x2*PetscConj(yy2[2]);
128:     case 2:
129:       x1    = x[1];
130:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
131:       sum2 += x1*PetscConj(yy2[1]);
132:     case 1:
133:       x0    = x[0];
134:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
135:       sum2 += x0*PetscConj(yy2[0]);
136:     case 0:
137:       x   += j_rem;
138:       yy0 += j_rem;
139:       yy1 += j_rem;
140:       yy2 += j_rem;
141:       j   -= j_rem;
142:       break;
143:     }
144:     while (j>0) {
145:       x0 = x[0];
146:       x1 = x[1];
147:       x2 = x[2];
148:       x3 = x[3];
149:       x += 4;

151:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
152:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
153:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
154:       j    -= 4;
155:     }
156:     z[0] = sum0;
157:     z[1] = sum1;
158:     z[2] = sum2;
159:     VecRestoreArrayRead(yy[0],&yy0);
160:     VecRestoreArrayRead(yy[1],&yy1);
161:     VecRestoreArrayRead(yy[2],&yy2);
162:     break;
163:   case 2:
164:     VecGetArrayRead(yy[0],&yy0);
165:     VecGetArrayRead(yy[1],&yy1);
166:     switch (j_rem=j&0x3) {
167:     case 3:
168:       x2    = x[2];
169:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
170:     case 2:
171:       x1    = x[1];
172:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
173:     case 1:
174:       x0    = x[0];
175:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
176:     case 0:
177:       x   += j_rem;
178:       yy0 += j_rem;
179:       yy1 += j_rem;
180:       j   -= j_rem;
181:       break;
182:     }
183:     while (j>0) {
184:       x0 = x[0];
185:       x1 = x[1];
186:       x2 = x[2];
187:       x3 = x[3];
188:       x += 4;

190:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
191:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
192:       j    -= 4;
193:     }
194:     z[0] = sum0;
195:     z[1] = sum1;

197:     VecRestoreArrayRead(yy[0],&yy0);
198:     VecRestoreArrayRead(yy[1],&yy1);
199:     break;
200:   case 1:
201:     VecGetArrayRead(yy[0],&yy0);
202:     switch (j_rem=j&0x3) {
203:     case 3:
204:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
205:     case 2:
206:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
207:     case 1:
208:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
209:     case 0:
210:       x   += j_rem;
211:       yy0 += j_rem;
212:       j   -= j_rem;
213:       break;
214:     }
215:     while (j>0) {
216:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
217:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
218:       yy0  +=4;
219:       j    -= 4; x+=4;
220:     }
221:     z[0] = sum0;

223:     VecRestoreArrayRead(yy[0],&yy0);
224:     break;
225:   case 0:
226:     break;
227:   }
228:   z  += nv_rem;
229:   i  -= nv_rem;
230:   yy += nv_rem;

232:   while (i >0) {
233:     sum0 = 0.;
234:     sum1 = 0.;
235:     sum2 = 0.;
236:     sum3 = 0.;
237:     VecGetArrayRead(yy[0],&yy0);
238:     VecGetArrayRead(yy[1],&yy1);
239:     VecGetArrayRead(yy[2],&yy2);
240:     VecGetArrayRead(yy[3],&yy3);

242:     j = n;
243:     x = xbase;
244:     switch (j_rem=j&0x3) {
245:     case 3:
246:       x2    = x[2];
247:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
248:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
249:     case 2:
250:       x1    = x[1];
251:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
252:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
253:     case 1:
254:       x0    = x[0];
255:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
256:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
257:     case 0:
258:       x   += j_rem;
259:       yy0 += j_rem;
260:       yy1 += j_rem;
261:       yy2 += j_rem;
262:       yy3 += j_rem;
263:       j   -= j_rem;
264:       break;
265:     }
266:     while (j>0) {
267:       x0 = x[0];
268:       x1 = x[1];
269:       x2 = x[2];
270:       x3 = x[3];
271:       x += 4;

273:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
274:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
275:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
276:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
277:       j    -= 4;
278:     }
279:     z[0] = sum0;
280:     z[1] = sum1;
281:     z[2] = sum2;
282:     z[3] = sum3;
283:     z   += 4;
284:     i   -= 4;
285:     VecRestoreArrayRead(yy[0],&yy0);
286:     VecRestoreArrayRead(yy[1],&yy1);
287:     VecRestoreArrayRead(yy[2],&yy2);
288:     VecRestoreArrayRead(yy[3],&yy3);
289:     yy  += 4;
290:   }
291:   VecRestoreArrayRead(xin,&xbase);
292:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
293:   return(0);
294: }
295: #endif

297: /* ----------------------------------------------------------------------------*/
300: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
301: {
302:   PetscErrorCode    ierr;
303:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
304:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
305:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
306:   Vec               *yy;

309:   sum0 = 0.;
310:   sum1 = 0.;
311:   sum2 = 0.;

313:   i      = nv;
314:   nv_rem = nv&0x3;
315:   yy     = (Vec*)yin;
316:   j      = n;
317:   VecGetArrayRead(xin,&xbase);
318:   x      = xbase;

320:   switch (nv_rem) {
321:   case 3:
322:     VecGetArrayRead(yy[0],&yy0);
323:     VecGetArrayRead(yy[1],&yy1);
324:     VecGetArrayRead(yy[2],&yy2);
325:     switch (j_rem=j&0x3) {
326:     case 3:
327:       x2    = x[2];
328:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
329:       sum2 += x2*yy2[2];
330:     case 2:
331:       x1    = x[1];
332:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
333:       sum2 += x1*yy2[1];
334:     case 1:
335:       x0    = x[0];
336:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
337:       sum2 += x0*yy2[0];
338:     case 0:
339:       x   += j_rem;
340:       yy0 += j_rem;
341:       yy1 += j_rem;
342:       yy2 += j_rem;
343:       j   -= j_rem;
344:       break;
345:     }
346:     while (j>0) {
347:       x0 = x[0];
348:       x1 = x[1];
349:       x2 = x[2];
350:       x3 = x[3];
351:       x += 4;

353:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
354:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
355:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
356:       j    -= 4;
357:     }
358:     z[0] = sum0;
359:     z[1] = sum1;
360:     z[2] = sum2;
361:     VecRestoreArrayRead(yy[0],&yy0);
362:     VecRestoreArrayRead(yy[1],&yy1);
363:     VecRestoreArrayRead(yy[2],&yy2);
364:     break;
365:   case 2:
366:     VecGetArrayRead(yy[0],&yy0);
367:     VecGetArrayRead(yy[1],&yy1);
368:     switch (j_rem=j&0x3) {
369:     case 3:
370:       x2    = x[2];
371:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
372:     case 2:
373:       x1    = x[1];
374:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
375:     case 1:
376:       x0    = x[0];
377:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
378:     case 0:
379:       x   += j_rem;
380:       yy0 += j_rem;
381:       yy1 += j_rem;
382:       j   -= j_rem;
383:       break;
384:     }
385:     while (j>0) {
386:       x0 = x[0];
387:       x1 = x[1];
388:       x2 = x[2];
389:       x3 = x[3];
390:       x += 4;

392:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
393:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
394:       j    -= 4;
395:     }
396:     z[0] = sum0;
397:     z[1] = sum1;

399:     VecRestoreArrayRead(yy[0],&yy0);
400:     VecRestoreArrayRead(yy[1],&yy1);
401:     break;
402:   case 1:
403:     VecGetArrayRead(yy[0],&yy0);
404:     switch (j_rem=j&0x3) {
405:     case 3:
406:       x2 = x[2]; sum0 += x2*yy0[2];
407:     case 2:
408:       x1 = x[1]; sum0 += x1*yy0[1];
409:     case 1:
410:       x0 = x[0]; sum0 += x0*yy0[0];
411:     case 0:
412:       x   += j_rem;
413:       yy0 += j_rem;
414:       j   -= j_rem;
415:       break;
416:     }
417:     while (j>0) {
418:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
419:       j    -= 4; x+=4;
420:     }
421:     z[0] = sum0;

423:     VecRestoreArrayRead(yy[0],&yy0);
424:     break;
425:   case 0:
426:     break;
427:   }
428:   z  += nv_rem;
429:   i  -= nv_rem;
430:   yy += nv_rem;

432:   while (i >0) {
433:     sum0 = 0.;
434:     sum1 = 0.;
435:     sum2 = 0.;
436:     sum3 = 0.;
437:     VecGetArrayRead(yy[0],&yy0);
438:     VecGetArrayRead(yy[1],&yy1);
439:     VecGetArrayRead(yy[2],&yy2);
440:     VecGetArrayRead(yy[3],&yy3);
441:     x    = xbase;

443:     j = n;
444:     switch (j_rem=j&0x3) {
445:     case 3:
446:       x2    = x[2];
447:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
448:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
449:     case 2:
450:       x1    = x[1];
451:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
452:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
453:     case 1:
454:       x0    = x[0];
455:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
456:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
457:     case 0:
458:       x   += j_rem;
459:       yy0 += j_rem;
460:       yy1 += j_rem;
461:       yy2 += j_rem;
462:       yy3 += j_rem;
463:       j   -= j_rem;
464:       break;
465:     }
466:     while (j>0) {
467:       x0 = x[0];
468:       x1 = x[1];
469:       x2 = x[2];
470:       x3 = x[3];
471:       x += 4;

473:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
474:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
475:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
476:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
477:       j    -= 4;
478:     }
479:     z[0] = sum0;
480:     z[1] = sum1;
481:     z[2] = sum2;
482:     z[3] = sum3;
483:     z   += 4;
484:     i   -= 4;
485:     VecRestoreArrayRead(yy[0],&yy0);
486:     VecRestoreArrayRead(yy[1],&yy1);
487:     VecRestoreArrayRead(yy[2],&yy2);
488:     VecRestoreArrayRead(yy[3],&yy3);
489:     yy  += 4;
490:   }
491:   VecRestoreArrayRead(xin,&xbase);
492:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
493:   return(0);
494: }


499: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
500: {
501:   PetscInt          i,j=0,n = xin->map->n;
502:   PetscReal         max,tmp;
503:   const PetscScalar *xx;
504:   PetscErrorCode    ierr;

507:   VecGetArrayRead(xin,&xx);
508:   if (!n) {
509:     max = PETSC_MIN_REAL;
510:     j   = -1;
511:   } else {
512:     max = PetscRealPart(*xx++); j = 0;
513:     for (i=1; i<n; i++) {
514:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
515:     }
516:   }
517:   *z = max;
518:   if (idx) *idx = j;
519:   VecRestoreArrayRead(xin,&xx);
520:   return(0);
521: }

525: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
526: {
527:   PetscInt          i,j=0,n = xin->map->n;
528:   PetscReal         min,tmp;
529:   const PetscScalar *xx;
530:   PetscErrorCode    ierr;

533:   VecGetArrayRead(xin,&xx);
534:   if (!n) {
535:     min = PETSC_MAX_REAL;
536:     j   = -1;
537:   } else {
538:     min = PetscRealPart(*xx++); j = 0;
539:     for (i=1; i<n; i++) {
540:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
541:     }
542:   }
543:   *z = min;
544:   if (idx) *idx = j;
545:   VecRestoreArrayRead(xin,&xx);
546:   return(0);
547: }

551: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
552: {
553:   PetscInt       i,n = xin->map->n;
554:   PetscScalar    *xx;

558:   VecGetArray(xin,&xx);
559:   if (alpha == (PetscScalar)0.0) {
560:     PetscMemzero(xx,n*sizeof(PetscScalar));
561:   } else {
562:     for (i=0; i<n; i++) xx[i] = alpha;
563:   }
564:   VecRestoreArray(xin,&xx);
565:   return(0);
566: }

570: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
571: {
572:   PetscErrorCode    ierr;
573:   PetscInt          n = xin->map->n,j,j_rem;
574:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
575:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

577: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
578: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
579: #endif

582:   PetscLogFlops(nv*2.0*n);
583:   VecGetArray(xin,&xx);
584:   switch (j_rem=nv&0x3) {
585:   case 3:
586:     VecGetArrayRead(y[0],&yy0);
587:     VecGetArrayRead(y[1],&yy1);
588:     VecGetArrayRead(y[2],&yy2);
589:     alpha0 = alpha[0];
590:     alpha1 = alpha[1];
591:     alpha2 = alpha[2];
592:     alpha += 3;
593:     PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
594:     VecRestoreArrayRead(y[0],&yy0);
595:     VecRestoreArrayRead(y[1],&yy1);
596:     VecRestoreArrayRead(y[2],&yy2);
597:     y   += 3;
598:     break;
599:   case 2:
600:     VecGetArrayRead(y[0],&yy0);
601:     VecGetArrayRead(y[1],&yy1);
602:     alpha0 = alpha[0];
603:     alpha1 = alpha[1];
604:     alpha +=2;
605:     PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
606:     VecRestoreArrayRead(y[0],&yy0);
607:     VecRestoreArrayRead(y[1],&yy1);
608:     y   +=2;
609:     break;
610:   case 1:
611:     VecGetArrayRead(y[0],&yy0);
612:     alpha0 = *alpha++;
613:     PetscKernelAXPY(xx,alpha0,yy0,n);
614:     VecRestoreArrayRead(y[0],&yy0);
615:     y   +=1;
616:     break;
617:   }
618:   for (j=j_rem; j<nv; j+=4) {
619:     VecGetArrayRead(y[0],&yy0);
620:     VecGetArrayRead(y[1],&yy1);
621:     VecGetArrayRead(y[2],&yy2);
622:     VecGetArrayRead(y[3],&yy3);
623:     alpha0 = alpha[0];
624:     alpha1 = alpha[1];
625:     alpha2 = alpha[2];
626:     alpha3 = alpha[3];
627:     alpha += 4;

629:     PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
630:     VecRestoreArrayRead(y[0],&yy0);
631:     VecRestoreArrayRead(y[1],&yy1);
632:     VecRestoreArrayRead(y[2],&yy2);
633:     VecRestoreArrayRead(y[3],&yy3);
634:     y   += 4;
635:   }
636:   VecRestoreArray(xin,&xx);
637:   return(0);
638: }

640: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

644: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
645: {
646:   PetscErrorCode    ierr;
647:   PetscInt          n = yin->map->n;
648:   PetscScalar       *yy;
649:   const PetscScalar *xx;

652:   if (alpha == (PetscScalar)0.0) {
653:     VecCopy(xin,yin);
654:   } else if (alpha == (PetscScalar)1.0) {
655:     VecAXPY_Seq(yin,alpha,xin);
656:   } else if (alpha == (PetscScalar)-1.0) {
657:     PetscInt i;
658:     VecGetArrayRead(xin,&xx);
659:     VecGetArray(yin,&yy);

661:     for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];

663:     VecRestoreArrayRead(xin,&xx);
664:     VecRestoreArray(yin,&yy);
665:     PetscLogFlops(1.0*n);
666:   } else {
667:     VecGetArrayRead(xin,&xx);
668:     VecGetArray(yin,&yy);
669: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
670:     {
671:       PetscScalar oalpha = alpha;
672:       fortranaypx_(&n,&oalpha,xx,yy);
673:     }
674: #else
675:     {
676:       PetscInt i;

678:       for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
679:     }
680: #endif
681:     VecRestoreArrayRead(xin,&xx);
682:     VecRestoreArray(yin,&yy);
683:     PetscLogFlops(2.0*n);
684:   }
685:   return(0);
686: }

688: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
689: /*
690:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
691:   to be slower than a regular C loop.  Hence,we do not include it.
692:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
693: */

697: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
698: {
699:   PetscErrorCode     ierr;
700:   PetscInt           i,n = win->map->n;
701:   PetscScalar        *ww;
702:   const PetscScalar  *yy,*xx;

705:   VecGetArrayRead(xin,&xx);
706:   VecGetArrayRead(yin,&yy);
707:   VecGetArray(win,&ww);
708:   if (alpha == (PetscScalar)1.0) {
709:     PetscLogFlops(n);
710:     /* could call BLAS axpy after call to memcopy, but may be slower */
711:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
712:   } else if (alpha == (PetscScalar)-1.0) {
713:     PetscLogFlops(n);
714:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
715:   } else if (alpha == (PetscScalar)0.0) {
716:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
717:   } else {
718:     PetscScalar oalpha = alpha;
719: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
720:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
721: #else
722:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
723: #endif
724:     PetscLogFlops(2.0*n);
725:   }
726:   VecRestoreArrayRead(xin,&xx);
727:   VecRestoreArrayRead(yin,&yy);
728:   VecRestoreArray(win,&ww);
729:   return(0);
730: }

734: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
735: {
736:   PetscErrorCode    ierr;
737:   PetscInt          n = xin->map->n,i;
738:   const PetscScalar *xx,*yy;
739:   PetscReal         m = 0.0;

742:   VecGetArrayRead(xin,&xx);
743:   VecGetArrayRead(yin,&yy);
744:   for (i = 0; i < n; i++) {
745:     if (yy[i] != (PetscScalar)0.0) {
746:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
747:     } else {
748:       m = PetscMax(PetscAbsScalar(xx[i]), m);
749:     }
750:   }
751:   VecRestoreArrayRead(xin,&xx);
752:   VecRestoreArrayRead(yin,&yy);
753:   MPIU_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
754:   PetscLogFlops(n);
755:   return(0);
756: }

760: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
761: {
762:   Vec_Seq *v = (Vec_Seq*)vin->data;

765:   if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
766:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
767:   v->array         = (PetscScalar*)a;
768:   return(0);
769: }

773: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
774: {
775:   Vec_Seq        *v = (Vec_Seq*)vin->data;

779:   PetscFree(v->array_allocated);
780:   v->array_allocated = v->array = (PetscScalar*)a;
781:   return(0);
782: }