Actual source code: pipecg2.c
petsc-3.14.2 2020-12-03
1: #include <petsc/private/kspimpl.h>
3: /* The auxiliary functions below are merged vector operations that load vectors from memory once and use
4: the data multiple times by performing vector operations element-wise. These functions
5: only apply to sequential vectors.
6: */
7: /* VecMergedDot_Private function merges the dot products for gamma, delta and dp */
8: static PetscErrorCode VecMergedDot_Private(Vec U,Vec W,Vec R,PetscInt normtype,PetscScalar *ru, PetscScalar *wu, PetscScalar *uu)
9: {
10: const PetscScalar *PETSC_RESTRICT PU, *PETSC_RESTRICT PW, *PETSC_RESTRICT PR;
11: PetscScalar sumru = 0.0, sumwu = 0.0, sumuu = 0.0;
12: PetscInt j, n;
13: PetscErrorCode ierr;
16: VecGetArrayRead(U,(const PetscScalar**)&PU);
17: VecGetArrayRead(W,(const PetscScalar**)&PW);
18: VecGetArrayRead(R,(const PetscScalar**)&PR);
19: VecGetLocalSize(U,&n);
21: if (normtype==KSP_NORM_PRECONDITIONED) {
22: PetscPragmaSIMD
23: for (j=0; j<n; j++) {
24: sumwu += PW[j] * PetscConj(PU[j]);
25: sumru += PR[j] * PetscConj(PU[j]);
26: sumuu += PU[j] * PetscConj(PU[j]);
27: }
28: } else if (normtype==KSP_NORM_UNPRECONDITIONED) {
29: PetscPragmaSIMD
30: for (j=0; j<n; j++) {
31: sumwu += PW[j] * PetscConj(PU[j]);
32: sumru += PR[j] * PetscConj(PU[j]);
33: sumuu += PR[j] * PetscConj(PR[j]);
34: }
35: } else if (normtype==KSP_NORM_NATURAL) {
36: PetscPragmaSIMD
37: for (j=0; j<n; j++) {
38: sumwu += PW[j] * PetscConj(PU[j]);
39: sumru += PR[j] * PetscConj(PU[j]);
40: }
41: sumuu = sumru;
42: }
44: *ru = sumru;
45: *wu = sumwu;
46: *uu = sumuu;
48: VecRestoreArrayRead(U,(const PetscScalar**)&PU);
49: VecRestoreArrayRead(W,(const PetscScalar**)&PW);
50: VecRestoreArrayRead(R,(const PetscScalar**)&PR);
51: return(0);
52: }
54: /* VecMergedDot2_Private function merges the dot products for lambda_1 and lambda_4 */
55: static PetscErrorCode VecMergedDot2_Private(Vec N,Vec M,Vec W,PetscScalar *wm,PetscScalar *nm)
56: {
57: const PetscScalar *PETSC_RESTRICT PN, *PETSC_RESTRICT PM, *PETSC_RESTRICT PW;
58: PetscScalar sumwm = 0.0, sumnm = 0.0;
59: PetscInt j, n;
60: PetscErrorCode ierr;
63: VecGetArrayRead(W,(const PetscScalar**)&PW);
64: VecGetArrayRead(N,(const PetscScalar**)&PN);
65: VecGetArrayRead(M,(const PetscScalar**)&PM);
66: VecGetLocalSize(N,&n);
68: PetscPragmaSIMD
69: for (j=0; j<n; j++){
70: sumwm += PW[j] * PetscConj(PM[j]);
71: sumnm += PN[j] * PetscConj(PM[j]);
72: }
74: *wm = sumwm;
75: *nm = sumnm;
77: VecRestoreArrayRead(W,(const PetscScalar**)&PW);
78: VecRestoreArrayRead(N,(const PetscScalar**)&PN);
79: VecRestoreArrayRead(M,(const PetscScalar**)&PM);
80: return(0);
81: }
83: /* VecMergedOpsShort_Private function merges the dot products, AXPY and SAXPY operations for all vectors for iteration 0 */
84: static PetscErrorCode VecMergedOpsShort_Private(Vec vx,Vec vr,Vec vz,Vec vw,Vec vp,Vec vq,Vec vc,Vec vd,Vec vg0,Vec vh0,Vec vg1,Vec vh1,Vec vs,Vec va1,Vec vb1,Vec ve,Vec vf,Vec vm,Vec vn,Vec vu,PetscInt normtype,PetscScalar beta0,PetscScalar alpha0,PetscScalar beta1,PetscScalar alpha1,PetscScalar *lambda)
85: {
86: PetscScalar *PETSC_RESTRICT px, *PETSC_RESTRICT pr, *PETSC_RESTRICT pz, *PETSC_RESTRICT pw;
87: PetscScalar *PETSC_RESTRICT pp, *PETSC_RESTRICT pq;
88: PetscScalar *PETSC_RESTRICT pc, *PETSC_RESTRICT pd, *PETSC_RESTRICT pg0, *PETSC_RESTRICT ph0, *PETSC_RESTRICT pg1,*PETSC_RESTRICT ph1,*PETSC_RESTRICT ps,*PETSC_RESTRICT pa1,*PETSC_RESTRICT pb1, *PETSC_RESTRICT pe,*PETSC_RESTRICT pf,*PETSC_RESTRICT pm,*PETSC_RESTRICT pn, *PETSC_RESTRICT pu;
89: PetscInt j, n;
90: PetscErrorCode ierr;
93: VecGetArray(vx,(PetscScalar**)&px);
94: VecGetArray(vr,(PetscScalar**)&pr);
95: VecGetArray(vz,(PetscScalar**)&pz);
96: VecGetArray(vw,(PetscScalar**)&pw);
97: VecGetArray(vp,(PetscScalar**)&pp);
98: VecGetArray(vq,(PetscScalar**)&pq);
99: VecGetArray(vc,(PetscScalar**)&pc);
100: VecGetArray(vd,(PetscScalar**)&pd);
101: VecGetArray(vg0,(PetscScalar**)&pg0);
102: VecGetArray(vh0,(PetscScalar**)&ph0);
103: VecGetArray(vg1,(PetscScalar**)&pg1);
104: VecGetArray(vh1,(PetscScalar**)&ph1);
105: VecGetArray(vs,(PetscScalar**)&ps);
106: VecGetArray(va1,(PetscScalar**)&pa1);
107: VecGetArray(vb1,(PetscScalar**)&pb1);
108: VecGetArray(ve,(PetscScalar**)&pe);
109: VecGetArray(vf,(PetscScalar**)&pf);
110: VecGetArray(vm,(PetscScalar**)&pm);
111: VecGetArray(vn,(PetscScalar**)&pn);
112: VecGetArray(vu,(PetscScalar**)&pu);
114: VecGetLocalSize(vx,&n);
115: for (j=0; j<15; j++) lambda[j] = 0.0;
117: if (normtype==KSP_NORM_PRECONDITIONED) {
118: PetscPragmaSIMD
119: for (j=0; j<n; j++) {
120: pz[j] = pn[j];
121: pq[j] = pm[j];
122: ps[j] = pw[j];
123: pp[j] = pu[j];
124: pc[j] = pg0[j];
125: pd[j] = ph0[j];
126: pa1[j] = pe[j];
127: pb1[j] = pf[j];
129: px[j] = px[j] + alpha0 * pp[j];
130: pr[j] = pr[j] - alpha0 * ps[j];
131: pu[j] = pu[j] - alpha0 * pq[j];
132: pw[j] = pw[j] - alpha0 * pz[j];
133: pm[j] = pm[j] - alpha0 * pc[j];
134: pn[j] = pn[j] - alpha0 * pd[j];
135: pg0[j] = pg0[j] - alpha0 * pa1[j];
136: ph0[j] = ph0[j] - alpha0 * pb1[j];
138: pg1[j] = pg0[j];
139: ph1[j] = ph0[j];
141: pz[j] = pn[j] + beta1 * pz[j];
142: pq[j] = pm[j] + beta1 * pq[j];
143: ps[j] = pw[j] + beta1 * ps[j];
144: pp[j] = pu[j] + beta1 * pp[j];
145: pc[j] = pg0[j] + beta1 * pc[j];
146: pd[j] = ph0[j] + beta1 * pd[j];
148: px[j] = px[j] + alpha1 * pp[j];
149: pr[j] = pr[j] - alpha1 * ps[j];
150: pu[j] = pu[j] - alpha1 * pq[j];
151: pw[j] = pw[j] - alpha1 * pz[j];
152: pm[j] = pm[j] - alpha1 * pc[j];
153: pn[j] = pn[j] - alpha1 * pd[j];
155: lambda[0] += ps[j] * PetscConj(pu[j]); lambda[1] += pw[j] * PetscConj(pm[j]);
156: lambda[2] += pw[j] * PetscConj(pq[j]); lambda[4] += ps[j] * PetscConj(pq[j]);
157: lambda[6] += pn[j] * PetscConj(pm[j]); lambda[7] += pn[j] * PetscConj(pq[j]);
158: lambda[9] += pz[j] * PetscConj(pq[j]); lambda[10] += pr[j] * PetscConj(pu[j]);
159: lambda[11] += pw[j] * PetscConj(pu[j]); lambda[12] += pu[j] * PetscConj(pu[j]);
160: }
161: lambda[3] = PetscConj(lambda[2]);
162: lambda[5] = PetscConj(lambda[1]);
163: lambda[8] = PetscConj(lambda[7]);
164: lambda[13] = PetscConj(lambda[11]);
165: lambda[14] = PetscConj(lambda[0]);
167: } else if (normtype==KSP_NORM_UNPRECONDITIONED) {
168: PetscPragmaSIMD
169: for (j=0; j<n; j++) {
170: pz[j] = pn[j];
171: pq[j] = pm[j];
172: ps[j] = pw[j];
173: pp[j] = pu[j];
174: pc[j] = pg0[j];
175: pd[j] = ph0[j];
176: pa1[j] = pe[j];
177: pb1[j] = pf[j];
179: px[j] = px[j] + alpha0 * pp[j];
180: pr[j] = pr[j] - alpha0 * ps[j];
181: pu[j] = pu[j] - alpha0 * pq[j];
182: pw[j] = pw[j] - alpha0 * pz[j];
183: pm[j] = pm[j] - alpha0 * pc[j];
184: pn[j] = pn[j] - alpha0 * pd[j];
185: pg0[j] = pg0[j] - alpha0 * pa1[j];
186: ph0[j] = ph0[j] - alpha0 * pb1[j];
188: pg1[j] = pg0[j];
189: ph1[j] = ph0[j];
191: pz[j] = pn[j] + beta1 * pz[j];
192: pq[j] = pm[j] + beta1 * pq[j];
193: ps[j] = pw[j] + beta1 * ps[j];
194: pp[j] = pu[j] + beta1 * pp[j];
195: pc[j] = pg0[j] + beta1 * pc[j];
196: pd[j] = ph0[j] + beta1 * pd[j];
198: px[j] = px[j] + alpha1 * pp[j];
199: pr[j] = pr[j] - alpha1 * ps[j];
200: pu[j] = pu[j] - alpha1 * pq[j];
201: pw[j] = pw[j] - alpha1 * pz[j];
202: pm[j] = pm[j] - alpha1 * pc[j];
203: pn[j] = pn[j] - alpha1 * pd[j];
205: lambda[0] += ps[j] * PetscConj(pu[j]); lambda[1] += pw[j] * PetscConj(pm[j]);
206: lambda[2] += pw[j] * PetscConj(pq[j]); lambda[4] += ps[j] * PetscConj(pq[j]);
207: lambda[6] += pn[j] * PetscConj(pm[j]); lambda[7] += pn[j] * PetscConj(pq[j]);
208: lambda[9] += pz[j] * PetscConj(pq[j]); lambda[10] += pr[j] * PetscConj(pu[j]);
209: lambda[11] += pw[j] * PetscConj(pu[j]); lambda[12] += pr[j] * PetscConj(pr[j]);
210: }
211: lambda[3] = PetscConj(lambda[2]);
212: lambda[5] = PetscConj(lambda[1]);
213: lambda[8] = PetscConj(lambda[7]);
214: lambda[13] = PetscConj(lambda[11]);
215: lambda[14] = PetscConj(lambda[0]);
217: } else if (normtype==KSP_NORM_NATURAL) {
218: PetscPragmaSIMD
219: for (j=0; j<n; j++) {
220: pz[j] = pn[j];
221: pq[j] = pm[j];
222: ps[j] = pw[j];
223: pp[j] = pu[j];
224: pc[j] = pg0[j];
225: pd[j] = ph0[j];
226: pa1[j] = pe[j];
227: pb1[j] = pf[j];
229: px[j] = px[j] + alpha0 * pp[j];
230: pr[j] = pr[j] - alpha0 * ps[j];
231: pu[j] = pu[j] - alpha0 * pq[j];
232: pw[j] = pw[j] - alpha0 * pz[j];
233: pm[j] = pm[j] - alpha0 * pc[j];
234: pn[j] = pn[j] - alpha0 * pd[j];
235: pg0[j] = pg0[j] - alpha0 * pa1[j];
236: ph0[j] = ph0[j] - alpha0 * pb1[j];
238: pg1[j] = pg0[j];
239: ph1[j] = ph0[j];
241: pz[j] = pn[j] + beta1 * pz[j];
242: pq[j] = pm[j] + beta1 * pq[j];
243: ps[j] = pw[j] + beta1 * ps[j];
244: pp[j] = pu[j] + beta1 * pp[j];
245: pc[j] = pg0[j] + beta1 * pc[j];
246: pd[j] = ph0[j] + beta1 * pd[j];
248: px[j] = px[j] + alpha1 * pp[j];
249: pr[j] = pr[j] - alpha1 * ps[j];
250: pu[j] = pu[j] - alpha1 * pq[j];
251: pw[j] = pw[j] - alpha1 * pz[j];
252: pm[j] = pm[j] - alpha1 * pc[j];
253: pn[j] = pn[j] - alpha1 * pd[j];
255: lambda[0] += ps[j] * PetscConj(pu[j]); lambda[1] += pw[j] * PetscConj(pm[j]);
256: lambda[2] += pw[j] * PetscConj(pq[j]); lambda[4] += ps[j] * PetscConj(pq[j]);
257: lambda[6] += pn[j] * PetscConj(pm[j]); lambda[7] += pn[j] * PetscConj(pq[j]);
258: lambda[9] += pz[j] * PetscConj(pq[j]); lambda[10] += pr[j] * PetscConj(pu[j]);
259: lambda[11] += pw[j] * PetscConj(pu[j]);
260: }
261: lambda[3] = PetscConj(lambda[2]);
262: lambda[5] = PetscConj(lambda[1]);
263: lambda[8] = PetscConj(lambda[7]);
264: lambda[13] = PetscConj(lambda[11]);
265: lambda[14] = PetscConj(lambda[0]);
266: lambda[12] = lambda[10];
268: }
270: VecRestoreArray(vx,(PetscScalar**)&px);
271: VecRestoreArray(vr,(PetscScalar**)&pr);
272: VecRestoreArray(vz,(PetscScalar**)&pz);
273: VecRestoreArray(vw,(PetscScalar**)&pw);
274: VecRestoreArray(vp,(PetscScalar**)&pp);
275: VecRestoreArray(vq,(PetscScalar**)&pq);
276: VecRestoreArray(vc,(PetscScalar**)&pc);
277: VecRestoreArray(vd,(PetscScalar**)&pd);
278: VecRestoreArray(vg0,(PetscScalar**)&pg0);
279: VecRestoreArray(vh0,(PetscScalar**)&ph0);
280: VecRestoreArray(vg1,(PetscScalar**)&pg1);
281: VecRestoreArray(vh1,(PetscScalar**)&ph1);
282: VecRestoreArray(vs,(PetscScalar**)&ps);
283: VecRestoreArray(va1,(PetscScalar**)&pa1);
284: VecRestoreArray(vb1,(PetscScalar**)&pb1);
285: VecRestoreArray(ve,(PetscScalar**)&pe);
286: VecRestoreArray(vf,(PetscScalar**)&pf);
287: VecRestoreArray(vm,(PetscScalar**)&pm);
288: VecRestoreArray(vn,(PetscScalar**)&pn);
289: VecRestoreArray(vu,(PetscScalar**)&pu);
290: return(0);
291: }
293: /* VecMergedOps_Private function merges the dot products, AXPY and SAXPY operations for all vectors for iteration > 0 */
294: static PetscErrorCode VecMergedOps_Private(Vec vx,Vec vr,Vec vz,Vec vw,Vec vp,Vec vq,Vec vc,Vec vd,Vec vg0,Vec vh0,Vec vg1,Vec vh1,Vec vs,Vec va1,Vec vb1,Vec ve,Vec vf,Vec vm,Vec vn,Vec vu,PetscInt normtype,PetscScalar beta0,PetscScalar alpha0,PetscScalar beta1,PetscScalar alpha1,PetscScalar *lambda, PetscScalar alphaold)
295: {
296: PetscScalar *PETSC_RESTRICT px, *PETSC_RESTRICT pr, *PETSC_RESTRICT pz, *PETSC_RESTRICT pw;
297: PetscScalar *PETSC_RESTRICT pp, *PETSC_RESTRICT pq;
298: PetscScalar *PETSC_RESTRICT pc, *PETSC_RESTRICT pd, *PETSC_RESTRICT pg0, *PETSC_RESTRICT ph0, *PETSC_RESTRICT pg1, *PETSC_RESTRICT ph1,*PETSC_RESTRICT ps, *PETSC_RESTRICT pa1,*PETSC_RESTRICT pb1,*PETSC_RESTRICT pe,*PETSC_RESTRICT pf, *PETSC_RESTRICT pm,*PETSC_RESTRICT pn, *PETSC_RESTRICT pu;
299: PetscInt j, n;
300: PetscErrorCode ierr;
303: VecGetArray(vx,(PetscScalar**)&px);
304: VecGetArray(vr,(PetscScalar**)&pr);
305: VecGetArray(vz,(PetscScalar**)&pz);
306: VecGetArray(vw,(PetscScalar**)&pw);
307: VecGetArray(vp,(PetscScalar**)&pp);
308: VecGetArray(vq,(PetscScalar**)&pq);
309: VecGetArray(vc,(PetscScalar**)&pc);
310: VecGetArray(vd,(PetscScalar**)&pd);
311: VecGetArray(vg0,(PetscScalar**)&pg0);
312: VecGetArray(vh0,(PetscScalar**)&ph0);
313: VecGetArray(vg1,(PetscScalar**)&pg1);
314: VecGetArray(vh1,(PetscScalar**)&ph1);
315: VecGetArray(vs,(PetscScalar**)&ps);
316: VecGetArray(va1,(PetscScalar**)&pa1);
317: VecGetArray(vb1,(PetscScalar**)&pb1);
318: VecGetArray(ve,(PetscScalar**)&pe);
319: VecGetArray(vf,(PetscScalar**)&pf);
320: VecGetArray(vm,(PetscScalar**)&pm);
321: VecGetArray(vn,(PetscScalar**)&pn);
322: VecGetArray(vu,(PetscScalar**)&pu);
324: VecGetLocalSize(vx,&n);
325: for (j=0; j<15; j++) lambda[j] = 0.0;
327: if (normtype==KSP_NORM_PRECONDITIONED) {
328: PetscPragmaSIMD
329: for (j=0; j<n; j++) {
330: pa1[j] = (pg1[j] - pg0[j])/alphaold;
331: pb1[j] = (ph1[j] - ph0[j])/alphaold;
333: pz[j] = pn[j] + beta0 * pz[j];
334: pq[j] = pm[j] + beta0 * pq[j];
335: ps[j] = pw[j] + beta0 * ps[j];
336: pp[j] = pu[j] + beta0 * pp[j];
337: pc[j] = pg0[j] + beta0 * pc[j];
338: pd[j] = ph0[j] + beta0 * pd[j];
339: pa1[j] = pe[j] + beta0 * pa1[j];
340: pb1[j] = pf[j] + beta0 * pb1[j];
342: px[j] = px[j] + alpha0 * pp[j];
343: pr[j] = pr[j] - alpha0 * ps[j];
344: pu[j] = pu[j] - alpha0 * pq[j];
345: pw[j] = pw[j] - alpha0 * pz[j];
346: pm[j] = pm[j] - alpha0 * pc[j];
347: pn[j] = pn[j] - alpha0 * pd[j];
348: pg0[j] = pg0[j] - alpha0 * pa1[j];
349: ph0[j] = ph0[j] - alpha0 * pb1[j];
351: pg1[j] = pg0[j];
352: ph1[j] = ph0[j];
354: pz[j] = pn[j] + beta1 * pz[j];
355: pq[j] = pm[j] + beta1 * pq[j];
356: ps[j] = pw[j] + beta1 * ps[j];
357: pp[j] = pu[j] + beta1 * pp[j];
358: pc[j] = pg0[j] + beta1 * pc[j];
359: pd[j] = ph0[j] + beta1 * pd[j];
361: px[j] = px[j] + alpha1 * pp[j];
362: pr[j] = pr[j] - alpha1 * ps[j];
363: pu[j] = pu[j] - alpha1 * pq[j];
364: pw[j] = pw[j] - alpha1 * pz[j];
365: pm[j] = pm[j] - alpha1 * pc[j];
366: pn[j] = pn[j] - alpha1 * pd[j];
368: lambda[0] += ps[j] * PetscConj(pu[j]); lambda[1] += pw[j] * PetscConj(pm[j]);
369: lambda[2] += pw[j] * PetscConj(pq[j]); lambda[4] += ps[j] * PetscConj(pq[j]);
370: lambda[6] += pn[j] * PetscConj(pm[j]); lambda[7] += pn[j] * PetscConj(pq[j]);
371: lambda[9] += pz[j] * PetscConj(pq[j]); lambda[10] += pr[j] * PetscConj(pu[j]);
372: lambda[11] += pw[j] * PetscConj(pu[j]); lambda[12] += pu[j] * PetscConj(pu[j]);
373: }
374: lambda[3] = PetscConj(lambda[2]);
375: lambda[5] = PetscConj(lambda[1]);
376: lambda[8] = PetscConj(lambda[7]);
377: lambda[13] = PetscConj(lambda[11]);
378: lambda[14] = PetscConj(lambda[0]);
379: } else if (normtype==KSP_NORM_UNPRECONDITIONED) {
380: PetscPragmaSIMD
381: for (j=0; j<n; j++) {
382: pa1[j] = (pg1[j] - pg0[j])/alphaold;
383: pb1[j] = (ph1[j] - ph0[j])/alphaold;
385: pz[j] = pn[j] + beta0 * pz[j];
386: pq[j] = pm[j] + beta0 * pq[j];
387: ps[j] = pw[j] + beta0 * ps[j];
388: pp[j] = pu[j] + beta0 * pp[j];
389: pc[j] = pg0[j] + beta0 * pc[j];
390: pd[j] = ph0[j] + beta0 * pd[j];
391: pa1[j] = pe[j] + beta0 * pa1[j];
392: pb1[j] = pf[j] + beta0 * pb1[j];
394: px[j] = px[j] + alpha0 * pp[j];
395: pr[j] = pr[j] - alpha0 * ps[j];
396: pu[j] = pu[j] - alpha0 * pq[j];
397: pw[j] = pw[j] - alpha0 * pz[j];
398: pm[j] = pm[j] - alpha0 * pc[j];
399: pn[j] = pn[j] - alpha0 * pd[j];
400: pg0[j] = pg0[j] - alpha0 * pa1[j];
401: ph0[j] = ph0[j] - alpha0 * pb1[j];
403: pg1[j] = pg0[j];
404: ph1[j] = ph0[j];
406: pz[j] = pn[j] + beta1 * pz[j];
407: pq[j] = pm[j] + beta1 * pq[j];
408: ps[j] = pw[j] + beta1 * ps[j];
409: pp[j] = pu[j] + beta1 * pp[j];
410: pc[j] = pg0[j] + beta1 * pc[j];
411: pd[j] = ph0[j] + beta1 * pd[j];
413: px[j] = px[j] + alpha1 * pp[j];
414: pr[j] = pr[j] - alpha1 * ps[j];
415: pu[j] = pu[j] - alpha1 * pq[j];
416: pw[j] = pw[j] - alpha1 * pz[j];
417: pm[j] = pm[j] - alpha1 * pc[j];
418: pn[j] = pn[j] - alpha1 * pd[j];
420: lambda[0] += ps[j] * PetscConj(pu[j]); lambda[1] += pw[j] * PetscConj(pm[j]);
421: lambda[2] += pw[j] * PetscConj(pq[j]); lambda[4] += ps[j] * PetscConj(pq[j]);
422: lambda[6] += pn[j] * PetscConj(pm[j]); lambda[7] += pn[j] * PetscConj(pq[j]);
423: lambda[9] += pz[j] * PetscConj(pq[j]); lambda[10] += pr[j] * PetscConj(pu[j]);
424: lambda[11] += pw[j] * PetscConj(pu[j]); lambda[12] += pr[j] * PetscConj(pr[j]);
425: }
426: lambda[3] = PetscConj(lambda[2]);
427: lambda[5] = PetscConj(lambda[1]);
428: lambda[8] = PetscConj(lambda[7]);
429: lambda[13] = PetscConj(lambda[11]);
430: lambda[14] = PetscConj(lambda[0]);
431: } else if (normtype==KSP_NORM_NATURAL) {
432: PetscPragmaSIMD
433: for (j=0; j<n; j++) {
434: pa1[j] = (pg1[j] - pg0[j])/alphaold;
435: pb1[j] = (ph1[j] - ph0[j])/alphaold;
437: pz[j] = pn[j] + beta0 * pz[j];
438: pq[j] = pm[j] + beta0 * pq[j];
439: ps[j] = pw[j] + beta0 * ps[j];
440: pp[j] = pu[j] + beta0 * pp[j];
441: pc[j] = pg0[j] + beta0 * pc[j];
442: pd[j] = ph0[j] + beta0 * pd[j];
443: pa1[j] = pe[j] + beta0 * pa1[j];
444: pb1[j] = pf[j] + beta0 * pb1[j];
446: px[j] = px[j] + alpha0 * pp[j];
447: pr[j] = pr[j] - alpha0 * ps[j];
448: pu[j] = pu[j] - alpha0 * pq[j];
449: pw[j] = pw[j] - alpha0 * pz[j];
450: pm[j] = pm[j] - alpha0 * pc[j];
451: pn[j] = pn[j] - alpha0 * pd[j];
452: pg0[j] = pg0[j] - alpha0 * pa1[j];
453: ph0[j] = ph0[j] - alpha0 * pb1[j];
455: pg1[j] = pg0[j];
456: ph1[j] = ph0[j];
458: pz[j] = pn[j] + beta1 * pz[j];
459: pq[j] = pm[j] + beta1 * pq[j];
460: ps[j] = pw[j] + beta1 * ps[j];
461: pp[j] = pu[j] + beta1 * pp[j];
462: pc[j] = pg0[j] + beta1 * pc[j];
463: pd[j] = ph0[j] + beta1 * pd[j];
465: px[j] = px[j] + alpha1 * pp[j];
466: pr[j] = pr[j] - alpha1 * ps[j];
467: pu[j] = pu[j] - alpha1 * pq[j];
468: pw[j] = pw[j] - alpha1 * pz[j];
469: pm[j] = pm[j] - alpha1 * pc[j];
470: pn[j] = pn[j] - alpha1 * pd[j];
472: lambda[0] += ps[j] * PetscConj(pu[j]); lambda[1] += pw[j] * PetscConj(pm[j]);
473: lambda[2] += pw[j] * PetscConj(pq[j]); lambda[4] += ps[j] * PetscConj(pq[j]);
474: lambda[6] += pn[j] * PetscConj(pm[j]); lambda[7] += pn[j] * PetscConj(pq[j]);
475: lambda[9] += pz[j] * PetscConj(pq[j]); lambda[10] += pr[j] * PetscConj(pu[j]);
476: lambda[11] += pw[j] * PetscConj(pu[j]);
477: }
478: lambda[3] = PetscConj(lambda[2]);
479: lambda[5] = PetscConj(lambda[1]);
480: lambda[8] = PetscConj(lambda[7]);
481: lambda[13] = PetscConj(lambda[11]);
482: lambda[14] = PetscConj(lambda[0]);
483: lambda[12] = lambda[10];
484: }
486: VecRestoreArray(vx,(PetscScalar**)&px);
487: VecRestoreArray(vr,(PetscScalar**)&pr);
488: VecRestoreArray(vz,(PetscScalar**)&pz);
489: VecRestoreArray(vw,(PetscScalar**)&pw);
490: VecRestoreArray(vp,(PetscScalar**)&pp);
491: VecRestoreArray(vq,(PetscScalar**)&pq);
492: VecRestoreArray(vc,(PetscScalar**)&pc);
493: VecRestoreArray(vd,(PetscScalar**)&pd);
494: VecRestoreArray(vg0,(PetscScalar**)&pg0);
495: VecRestoreArray(vh0,(PetscScalar**)&ph0);
496: VecRestoreArray(vg1,(PetscScalar**)&pg1);
497: VecRestoreArray(vh1,(PetscScalar**)&ph1);
498: VecRestoreArray(vs,(PetscScalar**)&ps);
499: VecRestoreArray(va1,(PetscScalar**)&pa1);
500: VecRestoreArray(vb1,(PetscScalar**)&pb1);
501: VecRestoreArray(ve,(PetscScalar**)&pe);
502: VecRestoreArray(vf,(PetscScalar**)&pf);
503: VecRestoreArray(vm,(PetscScalar**)&pm);
504: VecRestoreArray(vn,(PetscScalar**)&pn);
505: VecRestoreArray(vu,(PetscScalar**)&pu);
506: return(0);
507: }
509: /*
510: KSPSetUp_PIPECG2 - Sets up the workspace needed by the PIPECG method.
512: This is called once, usually automatically by KSPSolve() or KSPSetUp()
513: but can be called directly by KSPSetUp()
514: */
515: static PetscErrorCode KSPSetUp_PIPECG2(KSP ksp)
516: {
520: /* get work vectors needed by PIPECG2 */
521: KSPSetWorkVecs(ksp,20);
522: return(0);
523: }
525: /*
526: KSPSolve_PIPECG2 - This routine actually applies the PIPECG2 method
527: */
528: static PetscErrorCode KSPSolve_PIPECG2(KSP ksp)
529: {
531: PetscInt i,n;
532: PetscScalar alpha[2],beta[2],gamma[2],delta[2],lambda[15];
533: PetscScalar dps = 0.0,alphaold=0.0;
534: PetscReal dp = 0.0;
535: Vec X,B,Z,P,W,Q,U,M,N,R,S,C,D,E,F,G[2],H[2],A1,B1;
536: Mat Amat,Pmat;
537: PetscBool diagonalscale;
538: MPI_Comm pcomm;
539: MPI_Request req;
540: MPI_Status stat;
543: pcomm = PetscObjectComm((PetscObject)ksp);
544: PCGetDiagonalScale(ksp->pc,&diagonalscale);
545: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
547: X = ksp->vec_sol;
548: B = ksp->vec_rhs;
549: M = ksp->work[0];
550: Z = ksp->work[1];
551: P = ksp->work[2];
552: N = ksp->work[3];
553: W = ksp->work[4];
554: Q = ksp->work[5];
555: U = ksp->work[6];
556: R = ksp->work[7];
557: S = ksp->work[8];
558: C = ksp->work[9];
559: D = ksp->work[10];
560: E = ksp->work[11];
561: F = ksp->work[12];
562: G[0] = ksp->work[13];
563: H[0] = ksp->work[14];
564: G[1] = ksp->work[15];
565: H[1] = ksp->work[16];
566: A1 = ksp->work[17];
567: B1 = ksp->work[18];
569: PetscMemzero(alpha,2*sizeof(PetscScalar));
570: PetscMemzero(beta,2*sizeof(PetscScalar));
571: PetscMemzero(gamma,2*sizeof(PetscScalar));
572: PetscMemzero(delta,2*sizeof(PetscScalar));
573: PetscMemzero(lambda,15*sizeof(PetscScalar));
575: VecGetLocalSize(B,&n);
576: PCGetOperators(ksp->pc,&Amat,&Pmat);
578: ksp->its = 0;
579: if (!ksp->guess_zero) {
580: KSP_MatMult(ksp,Amat,X,R); /* r <- b - Ax */
581: VecAYPX(R,-1.0,B);
582: } else {
583: VecCopy(B,R); /* r <- b (x is 0) */
584: }
586: KSP_PCApply(ksp,R,U); /* u <- Br */
587: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
589: VecMergedDot_Private(U,W,R,ksp->normtype,&gamma[0],&delta[0],&dps); /* gamma <- r'*u , delta <- w'*u , dp <- u'*u or r'*r or r'*u depending on ksp_norm_type */
590: lambda[10]= gamma[0];
591: lambda[11]= delta[0];
592: lambda[12] = dps;
594: MPI_Iallreduce(MPI_IN_PLACE,&lambda[10],3,MPIU_SCALAR,MPIU_SUM,pcomm,&req);
596: KSP_PCApply(ksp,W,M); /* m <- Bw */
597: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
599: KSP_PCApply(ksp,N,G[0]); /* g <- Bn */
600: KSP_MatMult(ksp,Amat,G[0],H[0]); /* h <- Ag */
602: KSP_PCApply(ksp,H[0],E); /* e <- Bh */
603: KSP_MatMult(ksp,Amat,E,F); /* f <- Ae */
605: MPI_Wait(&req,&stat);
607: gamma[0] = lambda[10];
608: delta[0] = lambda[11];
609: dp = PetscSqrtReal(PetscAbsScalar(lambda[12]));
611: VecMergedDot2_Private(N,M,W,&lambda[1],&lambda[6]); /* lambda_1 <- w'*m , lambda_4 <- n'*m */
612: MPIU_Allreduce(MPI_IN_PLACE,&lambda[1],1,MPIU_SCALAR,MPIU_SUM,pcomm);
613: MPIU_Allreduce(MPI_IN_PLACE,&lambda[6],1,MPIU_SCALAR,MPIU_SUM,pcomm);
615: lambda[5] = PetscConj(lambda[1]);
616: lambda[13] = PetscConj(lambda[11]);
618: KSPLogResidualHistory(ksp,dp);
619: KSPMonitor(ksp,0,dp);
620: ksp->rnorm = dp;
622: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
623: if (ksp->reason) return(0);
625: for (i=2; i<ksp->max_it; i+=2) {
626: if (i == 2) {
627: beta[0] = 0;
628: alpha[0] = gamma[0]/delta[0];
630: gamma[1] = gamma[0] - alpha[0] * lambda[13] - alpha[0] * delta[0] + alpha[0] * alpha[0] * lambda[1];
631: delta[1] = delta[0] - alpha[0] * lambda[1] - alpha[0] * lambda[5] + alpha[0]*alpha[0]*lambda[6];
633: beta[1] = gamma[1] / gamma[0];
634: alpha[1] = gamma[1] / (delta[1] - beta[1] / alpha[0] * gamma[1]);
636: VecMergedOpsShort_Private(X,R,Z,W,P,Q,C,D,G[0],H[0],G[1],H[1],S,A1,B1,E,F,M,N,U,ksp->normtype,beta[0],alpha[0],beta[1],alpha[1],lambda);
637: } else {
638: beta[0] = gamma[1] / gamma[0];
639: alpha[0] = gamma[1] / (delta[1] - beta[0] / alpha[1] * gamma[1]);
641: gamma[0] = gamma[1];
642: delta[0] = delta[1];
644: gamma[1] = gamma[0] - alpha[0]*(lambda[13] + beta[0]*lambda[14]) - alpha[0] *(delta[0] + beta[0] * lambda[0]) + alpha[0] * alpha[0] * (lambda[1] + beta[0] * lambda[2] + beta[0] * lambda[3] + beta[0] * beta[0] * lambda[4]);
646: delta[1] = delta[0] - alpha[0] * (lambda[1] + beta[0]* lambda[2]) - alpha[0] *(lambda[5] + beta[0] * lambda[3]) + alpha[0]*alpha[0] * (lambda[6] + beta[0] * lambda[7] + beta[0] *lambda[8] + beta[0] * beta[0] * lambda[9]);
648: beta[1] = gamma[1] / gamma[0];
649: alpha[1] = gamma[1] / (delta[1] - beta[1] / alpha[0] * gamma[1]);
651: VecMergedOps_Private(X,R,Z,W,P,Q,C,D,G[0],H[0],G[1],H[1],S,A1,B1,E,F,M,N,U,ksp->normtype,beta[0],alpha[0],beta[1],alpha[1],lambda,alphaold);
652: }
654: gamma[0] = gamma[1];
655: delta[0] = delta[1];
657: MPI_Iallreduce(MPI_IN_PLACE,lambda,15,MPIU_SCALAR,MPIU_SUM,pcomm,&req); /* Calculating the lambdas, gamma, delta and dp */
659: KSP_PCApply(ksp,N,G[0]); /* g <- Bn */
660: KSP_MatMult(ksp,Amat,G[0],H[0]); /* h <- Ag */
662: KSP_PCApply(ksp,H[0],E); /* e <- Bh */
663: KSP_MatMult(ksp,Amat,E,F); /* f <- Ae */
665: MPI_Wait(&req,&stat);
667: gamma[1] = lambda[10];
668: delta[1] = lambda[11];
669: dp = PetscSqrtReal(PetscAbsScalar(lambda[12]));
671: alphaold = alpha[1];
672: ksp->its = i;
674: if (i > 0) {
675: if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
676: ksp->rnorm = dp;
677: KSPLogResidualHistory(ksp,dp);
678: KSPMonitor(ksp,i,dp);
679: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
680: if (ksp->reason) break;
681: }
682: }
684: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
685: return(0);
686: }
688: /*MC
689: KSPPIPECG2 - Pipelined conjugate gradient method with a single non-blocking allreduce per two iterations.
691: This method has only a single non-blocking reduction per two iterations, compared to 2 blocking for standard CG. The
692: non-blocking reduction is overlapped by two matrix-vector products and two preconditioner applications.
694: Level: intermediate
696: Notes:
697: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
698: See the FAQ on the PETSc website for details.
700: Contributed by:
701: Manasi Tiwari, Computational and Data Sciences, Indian Institute of Science, Bangalore
703: Reference:
704: Manasi Tiwari and Sathish Vadhiyar, "Pipelined Conjugate Gradient Methods for Distributed Memory Systems",
705: Submitted to International Conference on High Performance Computing, Data and Analytics 2020.
707: .seealso: KSPCreate(), KSPSetType(), KSPCG, KSPPIPECG
708: M*/
709: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECG2(KSP ksp)
710: {
714: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
715: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
716: KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
717: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
719: ksp->ops->setup = KSPSetUp_PIPECG2;
720: ksp->ops->solve = KSPSolve_PIPECG2;
721: ksp->ops->destroy = KSPDestroyDefault;
722: ksp->ops->view = NULL;
723: ksp->ops->setfromoptions = NULL;
724: ksp->ops->buildsolution = KSPBuildSolutionDefault;
725: ksp->ops->buildresidual = KSPBuildResidualDefault;
726: return(0);
727: }