Actual source code: agmresdeflation.c

petsc-3.11.0 2019-03-29
Report Typos and Errors
  1: /*
  2:  *  This file computes data for the deflated restarting in the Newton-basis GMRES. At each restart (or at each detected stagnation in the adaptive strategy), a basis of an (approximated)invariant subspace corresponding to the smallest eigenvalues is extracted from the Krylov subspace. It is then used to augment the Newton basis.
  3:  *
  4:  * References : D. Nuentsa Wakam and J. Erhel, Parallelism and robustness in GMRES with the Newton basis and the deflation of eigenvalues. Research report INRIA RR-7787.
  5:  * Author: Desire NUENTSA WAKAM <desire.nuentsa_wakam@inria.fr>, 2011
  6:  */

  8:  #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>

 10: /* Quicksort algorithm to sort  the eigenvalues in increasing orders
 11:  * val_r - real part of eigenvalues, unchanged on exit.
 12:  * val_i - Imaginary part of eigenvalues unchanged on exit.
 13:  * size - Number of eigenvalues (with complex conjugates)
 14:  * perm - contains on exit the permutation vector to reorder the vectors val_r and val_i.
 15:  */
 16: #define  DEPTH  500
 17: static PetscErrorCode KSPAGMRESQuickSort(PetscScalar *val_r, PetscScalar *val_i, PetscInt size, PetscInt *perm)
 18: {

 20:   PetscInt    deb[DEPTH], fin[DEPTH];
 21:   PetscInt    ipivot;
 22:   PetscScalar pivot_r, pivot_i;
 23:   PetscInt    i, L, R, j;
 24:   PetscScalar abs_pivot;
 25:   PetscScalar abs_val;

 28:   /* initialize perm vector */
 29:   for (j = 0; j < size; j++) perm[j] = j;

 31:   deb[0] = 0;
 32:   fin[0] = size;
 33:   i      = 0;
 34:   while (i >= 0) {
 35:     L = deb[i];
 36:     R = fin[i] - 1;
 37:     if (L < R) {
 38:       pivot_r   = val_r[L];
 39:       pivot_i   = val_i[L];
 40:       abs_pivot = PetscSqrtReal(pivot_r * pivot_r + pivot_i * pivot_i);
 41:       ipivot    = perm[L];
 42:       if (i == DEPTH - 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Could cause stack overflow: Try to increase the value of DEPTH ");
 43:       while (L < R) {
 44:         abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
 45:         while (abs_val >= abs_pivot && L < R) {
 46:           R--;
 47:           abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
 48:         }
 49:         if (L < R) {
 50:           val_r[L] = val_r[R];
 51:           val_i[L] = val_i[R];
 52:           perm[L]  = perm[R];
 53:           L       += 1;
 54:         }
 55:         abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
 56:         while (abs_val <= abs_pivot && L < R) {
 57:           L++;
 58:           abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
 59:         }
 60:         if (L < R) {
 61:           val_r[R] = val_r[L];
 62:           val_i[R] = val_i[L];
 63:           perm[R]  = perm[L];
 64:           R       -= 1;
 65:         }
 66:       }
 67:       val_r[L] = pivot_r;
 68:       val_i[L] = pivot_i;
 69:       perm[L]  = ipivot;
 70:       deb[i+1] = L + 1;
 71:       fin[i+1] = fin[i];
 72:       fin[i]   = L;
 73:       i       += 1;
 74:       if (i == DEPTH - 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Could cause stack overflow: Try to increase the value of DEPTH ");
 75:     } else i--;
 76:   }
 77:   return(0);
 78: }

 80: /*
 81:  * Compute the Schur vectors from the generalized eigenvalue problem A.u =\lamba.B.u
 82:  * KspSize -  rank of the matrices A and B, size of the current Krylov basis
 83:  * A - Left matrix
 84:  * B - Right matrix
 85:  * ldA - first dimension of A as declared  in the calling program
 86:  * ldB - first dimension of B as declared  in the calling program
 87:  * IsReduced - specifies if the matrices are already in the reduced form,
 88:  * i.e A is a Hessenberg matrix and B is upper triangular.
 89:  * Sr - on exit, the extracted Schur vectors corresponding
 90:  * the smallest eigenvalues (with complex conjugates)
 91:  * CurNeig - Number of extracted eigenvalues
 92:  */
 93: static PetscErrorCode KSPAGMRESSchurForm(KSP ksp, PetscBLASInt KspSize, PetscScalar *A, PetscBLASInt ldA, PetscScalar *B, PetscBLASInt ldB, PetscBool IsReduced, PetscScalar *Sr, PetscInt *CurNeig)
 94: {
 95:   KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
 96:   PetscInt       max_k   = agmres->max_k;
 97:   PetscBLASInt   r;
 98:   PetscInt       neig    = agmres->neig;
 99:   PetscScalar    *wr     = agmres->wr;
100:   PetscScalar    *wi     = agmres->wi;
101:   PetscScalar    *beta   = agmres->beta;
102:   PetscScalar    *Q      = agmres->Q;
103:   PetscScalar    *Z      = agmres->Z;
104:   PetscScalar    *work   = agmres->work;
105:   PetscBLASInt   *select = agmres->select;
106:   PetscInt       *perm   = agmres->perm;
107:   PetscBLASInt   sdim    = 0;
108:   PetscInt       i,j;
109:   PetscBLASInt   info;
111:   PetscBLASInt   *iwork = agmres->iwork;
112:   PetscBLASInt   N = MAXKSPSIZE;
113:   PetscBLASInt   lwork,liwork;
114:   PetscBLASInt   ilo,ihi;
115:   PetscBLASInt   ijob,wantQ,wantZ;
116:   PetscScalar    Dif[2];

119:   ijob  = 2;
120:   wantQ = 1;
121:   wantZ = 1;
122:   PetscBLASIntCast(PetscMax(8*N+16,4*neig*(N-neig)),&lwork);
123:   PetscBLASIntCast(2*N*neig,&liwork);
124:   ilo   = 1;
125:   PetscBLASIntCast(KspSize,&ihi);

127:   /* Compute the Schur form */
128:   if (IsReduced) {                /* The eigenvalue problem is already in reduced form, meaning that A is upper Hessenberg and B is triangular */
129: #if defined(PETSC_MISSING_LAPACK_HGEQZ)
130:     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HGEQZ - Lapack routine is unavailable.");
131: #else
132:     PetscStackCallBLAS("LAPACKhgeqz",LAPACKhgeqz_("S", "I", "I", &KspSize, &ilo, &ihi, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, work, &lwork, &info));
133:     if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling LAPACK routine xhgeqz_");
134: #endif
135:   } else {
136: #if defined(PETSC_MISSING_LAPACK_GGES)
137:     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
138: #else
139:     PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &KspSize, A, &ldA, B, &ldB, &sdim, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
140:     if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling LAPACK routine xgges_");
141: #endif
142:   }

144:   /* We should avoid computing these ratio...  */
145:   for (i = 0; i < KspSize; i++) {
146:     if (beta[i] != 0.0) {
147:       wr[i] /= beta[i];
148:       wi[i] /= beta[i];
149:     }
150:   }

152:   /* Sort the eigenvalues to extract the smallest ones */
153:   KSPAGMRESQuickSort(wr, wi, KspSize, perm);

155:   /* Count the number of extracted eigenvalues (with complex conjugates) */
156:   r = 0;
157:   while (r < neig) {
158:     if (wi[r] != 0) r += 2;
159:     else r += 1;
160:   }
161:   /* Reorder the Schur decomposition so that the cluster of smallest/largest eigenvalues appears in the leading diagonal blocks of A (and B)*/
162:   PetscMemzero(select, N*sizeof(PetscBLASInt));
163:   if (!agmres->GreatestEig) {
164:     for (j = 0; j < r; j++) select[perm[j]] = 1;
165:   } else {
166:     for (j = 0; j < r; j++) select[perm[KspSize-j-1]] = 1;
167:   }
168: #if defined(PETSC_MISSING_LAPACK_TGSEN)
169:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
170: #else
171:   PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &KspSize, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, &r, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
172:   if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
173: #endif
174:   /* Extract the Schur vectors associated to the r smallest eigenvalues */
175:   PetscMemzero(Sr,(N+1)*r*sizeof(PetscScalar));
176:   for (j = 0; j < r; j++) {
177:     for (i = 0; i < KspSize; i++) {
178:       Sr[j*(N+1)+i] = Z[j*N+i];
179:     }
180:   }

182:   /* Broadcast Sr to all other processes to have consistent data;
183:    * FIXME should investigate how to get unique Schur vectors (unique QR factorization, probably the sign of rotations) */
184:   MPI_Bcast(Sr, (N+1)*r, MPIU_SCALAR, agmres->First, PetscObjectComm((PetscObject)ksp));
185:   /* Update the Shift values for the Newton basis. This is surely necessary when applying the DeflationPrecond */
186:   if (agmres->DeflPrecond) {
187:     KSPAGMRESLejaOrdering(wr, wi, agmres->Rshift, agmres->Ishift, max_k);
188:   }
189:   *CurNeig = r; /* Number of extracted eigenvalues */
190:   return(0);

192: }

194: /*
195:  * This function form the matrices for the generalized eigenvalue problem,
196:  * it then compute the Schur vectors needed to augment the Newton basis.
197:  */
198: PetscErrorCode KSPAGMRESComputeDeflationData(KSP ksp)
199: {
200:   KSP_AGMRES     *agmres  = (KSP_AGMRES*)ksp->data;
201:   Vec            *U       = agmres->U;
202:   Vec            *TmpU    = agmres->TmpU;
203:   PetscScalar    *MatEigL = agmres->MatEigL;
204:   PetscScalar    *MatEigR = agmres->MatEigR;
205:   PetscScalar    *Sr      = agmres->Sr;
206:   PetscScalar    alpha, beta;
207:   PetscInt       i,j;
209:   PetscInt       max_k = agmres->max_k;     /* size of the non - augmented subspace */
210:   PetscInt       CurNeig;       /* Current number of extracted eigenvalues */
211:   PetscInt       N        = MAXKSPSIZE;
212:   PetscBLASInt   bN;
213:   PetscInt       lC       = N + 1;
214:   PetscInt       KspSize  = KSPSIZE;
215:   PetscBLASInt   blC,bKspSize;
216:   PetscInt       PrevNeig = agmres->r;

219:   PetscLogEventBegin(KSP_AGMRESComputeDeflationData, ksp, 0,0,0);
220:   if (agmres->neig <= 1) return(0);
221:   /* Explicitly form MatEigL = H^T*H, It can also be formed as H^T+h_{N+1,N}H^-1e^T */
222:   alpha = 1.0;
223:   beta  = 0.0;
224:   PetscBLASIntCast(KspSize,&bKspSize);
225:   PetscBLASIntCast(lC,&blC);
226:   PetscBLASIntCast(N,&bN);
227:   PetscStackCallBLAS("BLASgemm",BLASgemm_("T", "N", &bKspSize, &bKspSize, &blC, &alpha, agmres->hes_origin, &blC, agmres->hes_origin, &blC, &beta, MatEigL, &bN));
228:   if (!agmres->ritz) {
229:     /* Form TmpU = V*H where V is the Newton basis orthogonalized  with roddec*/
230:     for (j = 0; j < KspSize; j++) {
231:       /* Apply the elementary reflectors (stored in Qloc) on H */
232:       KSPAGMRESRodvec(ksp, KspSize+1, &agmres->hes_origin[j*lC], TmpU[j]);
233:     }
234:     /* Now form MatEigR = TmpU^T*W where W is [VEC_V(1:max_k); U] */
235:     for (j = 0; j < max_k; j++) {
236:       VecMDot(VEC_V(j), KspSize, TmpU, &MatEigR[j*N]);
237:     }
238:     for (j = max_k; j < KspSize; j++) {
239:       VecMDot(U[j-max_k], KspSize, TmpU, &MatEigR[j*N]);
240:     }
241:   } else { /* Form H^T */
242:     for (j = 0; j < N; j++) {
243:       for (i = 0; i < N; i++) {
244:         MatEigR[j*N+i] = agmres->hes_origin[i*lC+j];
245:       }
246:     }
247:   }
248:   /* Obtain the Schur form of  the generalized eigenvalue problem MatEigL*y = \lambda*MatEigR*y */
249:   if (agmres->DeflPrecond) {
250:     KSPAGMRESSchurForm(ksp, KspSize, agmres->hes_origin, lC, agmres->Rloc, lC, PETSC_TRUE, Sr, &CurNeig);
251:   } else {
252:     KSPAGMRESSchurForm(ksp, KspSize, MatEigL, N, MatEigR, N, PETSC_FALSE, Sr, &CurNeig);
253:   }

255:   if (agmres->DeflPrecond) { /* Switch to DGMRES to improve the basis of the invariant subspace associated to the deflation */
256:     agmres->HasSchur = PETSC_TRUE;
257:     KSPDGMRESComputeDeflationData(ksp, &CurNeig);
258:     return(0);
259:   }
260:   /* Form the Schur vectors in the entire subspace: U = W * Sr where W = [VEC_V(1:max_k); U]*/
261:   for (j = 0; j < PrevNeig; j++) { /* First, copy U to a temporary place */
262:     VecCopy(U[j], TmpU[j]);
263:   }

265:   for (j = 0; j < CurNeig; j++) {
266:     VecZeroEntries(U[j]);
267:     VecMAXPY(U[j], max_k, &Sr[j*(N+1)], &VEC_V(0));
268:     VecMAXPY(U[j], PrevNeig, &Sr[j*(N+1)+max_k], TmpU);
269:   }
270:   agmres->r = CurNeig;
271:   PetscLogEventEnd(KSP_AGMRESComputeDeflationData, ksp, 0,0,0);
272:   return(0);
273: }