Actual source code: dmerg2.c

slepc-3.7.1 2016-05-27
Report Typos and Errors
  1: /*
  2:    BDC - Block-divide and conquer (see description in README file).

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.

 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <slepc/private/dsimpl.h>
 25: #include <slepcblaslapack.h>

 27: PetscErrorCode BDC_dmerg2_(const char *jobz,PetscBLASInt rkct,PetscBLASInt n, 
 28:         PetscReal *ev,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq, 
 29:         PetscReal *rho,PetscReal *u,PetscBLASInt sbrkp1,PetscReal *v, 
 30:         PetscBLASInt sbrk,PetscBLASInt cutpnt,PetscReal *work,PetscBLASInt lwork, 
 31:         PetscBLASInt *iwork,PetscReal tol,PetscBLASInt *info,PetscBLASInt jobz_len)
 32: {
 33: /*  -- Routine written in LAPACK Version 3.0 style -- */
 34: /* *************************************************** */
 35: /*     Written by */
 36: /*     Michael Moldaschl and Wilfried Gansterer */
 37: /*     University of Vienna */
 38: /*     last modification: March 16, 2014 */

 40: /*     Small adaptations of original code written by */
 41: /*     Wilfried Gansterer and Bob Ward, */
 42: /*     Department of Computer Science, University of Tennessee */
 43: /*     see http://dx.doi.org/10.1137/S1064827501399432 */
 44: /* *************************************************** */

 46: /*  Purpose */
 47: /*  ======= */

 49: /*  DMERG2 computes the updated eigensystem of a diagonal matrix after */
 50: /*  modification by a rank-one symmetric matrix.  The diagonal matrix */
 51: /*  consists of two diagonal submatrices, and the vectors defining the */
 52: /*  rank-1 matrix similarly have two underlying subvectors each. */
 53: /*  The dimension of the first subproblem is CUTPNT, the dimension of */
 54: /*  the second subproblem is N-CUTPNT. */

 56: /*  T = Q(in) ( EV(in) + RHO * Z*Z' ) Q'(in) = Q(out) * EV(out) * Q'(out) */

 58: /*     where Z = Q'[V U']', where V is a row vector and U is a column */
 59: /*     vector with dimensions corresponding to the two underlying */
 60: /*     subproblems. */

 62: /*     The eigenvectors of the original matrix are stored in Q, and the */
 63: /*     eigenvalues in EV.  The algorithm consists of three stages: */

 65: /*        The first stage consists of deflating the size of the problem */
 66: /*        when there are multiple eigenvalues or if there is a zero in */
 67: /*        the Z vector.  For each such occurrence the dimension of the */
 68: /*        secular equation problem is reduced by one.  This stage is */
 69: /*        performed by the routine DSRTDF. */

 71: /*        The second stage consists of calculating the updated */
 72: /*        eigenvalues. This is done by finding the roots of the secular */
 73: /*        equation via the routine DLAED4 (as called by DLAED3M). */
 74: /*        This routine also calculates the eigenvectors of the current */
 75: /*        problem. */

 77: /*        If( JOBZ.EQ.'D' ) then the final stage consists */
 78: /*        of computing the updated eigenvectors directly using the updated */
 79: /*        eigenvalues. The eigenvectors for the current problem are multiplied */
 80: /*        with the eigenvectors from the overall problem. */

 82: /*  Arguments */
 83: /*  ========= */

 85: /*  JOBZ   (input) CHARACTER*1 */
 86: /*          = 'N': Compute eigenvalues only (not implemented); */
 87: /*          = 'D': Compute eigenvalues and eigenvectors. */
 88: /*                 Eigenvectors are accumulated in the divide-and-conquer */
 89: /*                 process. */

 91: /*  RKCT   (input) INTEGER */
 92: /*         The number of the rank modification which is accounted for */
 93: /*         (RKCT >= 1). Required parameter, because the update operation of the */
 94: /*         modification vector can be performed much more efficiently */
 95: /*         if RKCT.EQ.1. In that case, the eigenvector matrix is still */
 96: /*         block-diagonal. For RKCT.GE.2 the eigenvector matrix for the update */
 97: /*         operation has filled up and is a full matrix. */

 99: /*  N      (input) INTEGER */
100: /*         The dimension of the symmetric block tridiagonal matrix. */
101: /*         N >= 0. */

103: /*  EV     (input/output) DOUBLE PRECISION array, dimension (N) */
104: /*         On entry, the eigenvalues (=diagonal values) of the */
105: /*         rank-1-perturbed matrix. */
106: /*         On exit, the eigenvalues of the repaired matrix. */

108: /*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */
109: /*         On entry, the eigenvectors of the rank-1-perturbed matrix. */
110: /*         On exit, the eigenvectors of the repaired tridiagonal matrix. */

112: /*  LDQ    (input) INTEGER */
113: /*         The leading dimension of the array Q.  LDQ >= max(1,N). */

115: /*  INDXQ  (input/output) INTEGER array, dimension (N) */
116: /*         On entry, the permutation which separately sorts the two */
117: /*         subproblems in EV into ascending order. */
118: /*         On exit, the permutation which will reintegrate the */
119: /*         subproblems back into sorted order, */
120: /*         i.e. EV( INDXQ( I = 1, N ) ) will be in ascending order. */

122: /*  RHO    (input/output) DOUBLE PRECISION */
123: /*         The scalar in the rank-1 perturbation. Modified (multiplied */
124: /*         by 2) in DSRTDF. */

126: /*  U      (input) DOUBLE PRECISION array; dimension (SBRKP1), where SBRKP1 */
127: /*         is the size of the first (original) block after CUTPNT. */
128: /*         The column vector of the rank-1 subdiagonal connecting the */
129: /*         two diagonal subproblems. */
130: /*         Theoretically, zero entries might have to be appended after U */
131: /*         in order to make it have dimension (N-CUTPNT). However, this */
132: /*         is not required because it can be accounted for in the */
133: /*         matrix-vector product using the argument SBRKP1. */

135: /*  SBRKP1 (input) INTEGER */
136: /*         Dimension of the relevant (non-zero) part of the vector U. */
137: /*         Equal to the size of the first original block after the */
138: /*         breakpoint. */

140: /*  V      (input) DOUBLE PRECISION array; dimension (SBRK), where SBRK */
141: /*         is the size of the last (original) block before CUTPNT. */
142: /*         The row vector of the rank-1 subdiagonal connecting the two */
143: /*         diagonal subproblems. */
144: /*         Theoretically, zero entries might have to be inserted in front */
145: /*         of V in order to make it have dimension (CUTPNT). However, this */
146: /*         is not required because it can be accounted for in the */
147: /*         matrix-vector product using the argument SBRK. */

149: /*  SBRK   (input) INTEGER */
150: /*         Dimension of the relevant (non-zero) part of the vector V. */
151: /*         Equal to the size of the last original block before the */
152: /*         breakpoint. */

154: /*  CUTPNT (input) INTEGER */
155: /*         The location of the last eigenvalue of the leading diagonal */
156: /*         block.  min(1,N) <= CUTPNT <= max(1,N). */

158: /*  WORK   (workspace) DOUBLE PRECISION array, dimension (LWORK) */

160: /*  LWORK  (input) INTEGER */
161: /*         The dimension of the array WORK. */
162: /*         In order to guarantee correct results in all cases, */
163: /*         LWORK must be at least ( 2*N**2 + 3*N  ). In many cases, */
164: /*         less workspace is required. The absolute minimum required is */
165: /*         ( N**2 + 3*N ). */
166: /*         If the workspace provided is not sufficient, the routine will */
167: /*         return a corresponding error code and report how much workspace */
168: /*         was missing (see INFO). */
169: /*         NOTE: This parameter is needed for determining whether enough */
170: /*               workspace is provided, and, if not, for computing how */
171: /*               much workspace is needed. */

173: /*  IWORK  (workspace) INTEGER array, dimension ( 4*N ) */

175: /*  TOL    (input) DOUBLE PRECISION */
176: /*         User specified deflation tolerance for the routine DSRTDF. */

178: /*  INFO   (output) INTEGER */
179: /*          = 0:  successful exit. */
180: /*          < -200: not enough workspace */
181: /*                ABS(INFO + 200) numbers have to be stored in addition */
182: /*                to the workspace provided, otherwise some eigenvectors */
183: /*                will be incorrect. */
184: /*          < 0, > -99:  if INFO.EQ.-i, the i-th argument had an */
185: /*                       illegal value. */
186: /*          > 0:  if INFO.EQ.1, an eigenvalue did not converge */
187: /*                if INFO.EQ.2, the deflation counters DZ and DE do not sum */
188: /*                              up to the total number N-K of components */
189: /*                              deflated */

191: /*  Further Details */
192: /*  =============== */

194: /*  Based on code written by */
195: /*     Wilfried Gansterer and Bob Ward, */
196: /*     Department of Computer Science, University of Tennessee */

198: /*  Based on the design of the LAPACK code Dlaed1.f written by Jeff */
199: /*  Rutter, Computer Science Division, University of California at */
200: /*  Berkeley, and modified by Francoise Tisseur, University of Tennessee. */

202: /*  ===================================================================== */

204: #if defined(SLEPC_MISSING_LAPACK_LAMRG)
206:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAMRG - Lapack routine is unavailable");
207: #else
208:   PetscBLASInt   i, k, n1, n2, de, is, dz, iw, iz, iq2, nmc, cpp1;
209:   PetscBLASInt   indx, indxc, indxp, lwmin, idlmda;
210:   PetscBLASInt   spneed, coltyp, tmpcut, i__1, i__2, one=1, mone=-1;
211:   char           defl[1];
212:   PetscReal      done = 1.0, dzero = 0.0;

216:   *info = 0;
217:   lwmin = n*n + n * 3;

219:   if (n < 0) {
220:     *info = -3;
221:   } else if (ldq < PetscMax(1,n)) {
222:     *info = -6;
223:   } else if (cutpnt < PetscMin(1,n) || cutpnt > PetscMax(1,n)) {
224:     *info = -13;
225:   } else if (lwork < lwmin) {
226:     *info = -15;
227:   }
228:   if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DMERG2",-(*info));

230: /* **************************************************************************** */

232:   /* Quick return if possible */

234:   if (n == 0) return(0);

236: /* **************************************************************************** */

238:   /* The following values are integer pointers which indicate */
239:   /* the portion of the workspace used by a particular array in DSRTDF */
240:   /* and DLAED3M. */

242:   iz = 0;
243:   idlmda = iz + n;
244:   iw = idlmda + n;
245:   iq2 = iw + n;
246:   is = iq2 + n * n;

248:   /* After the pointer IS the matrix S is stored and read in WORK */
249:   /* in the routine DLAED3M. */

251:   indx = 0;
252:   indxc = indx + n;
253:   coltyp = indxc + n;
254:   indxp = coltyp + n;

256:   /* If eigenvectors are to be accumulated in the divide-and-conquer */
257:   /* process ( JOBZ.EQ.'D' ) form the z-vector which consists of */
258:   /* Q_1^T * V and Q_2^T * U. */

260:   cpp1 = cutpnt + 1;
261:   if (rkct == 1) {

263:     /* for the first rank modification the eigenvector matrix has */
264:     /* special block-diagonal structure and therefore Q_1^T * V and */
265:     /* Q_2^T * U can be formed separately. */

267:     PetscStackCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &cutpnt, &done,
268:               &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
269:     nmc = n - cutpnt;
270:     PetscStackCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &nmc, &done,
271:               &q[cpp1-1 + (cpp1-1)*ldq], &ldq, u,
272:               &one, &dzero, &work[iz + cutpnt], &one));

274:   } else {

276:     /* for the higher rank modifications, the vectors V and U */
277:     /* have to be multiplied with the full eigenvector matrix */

279:     PetscStackCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &n, &done,
280:               &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
281:     PetscStackCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &n, &done, &q[cpp1-1],
282:               &ldq, u, &one, &done, &work[iz], &one));

284:   }

286: /* **************************************************************************** */

288:   /* Deflate eigenvalues. */

290:   if (rkct == 1) {

292:     /* for the first rank modification we need the actual cutpoint */

294:     n1 = cutpnt;
295:     tmpcut = cutpnt;
296:   } else {

298:     /* for the later rank modifications there is no actual cutpoint any more */

300:     n1 = n;

302:     /* The original value of CUTPNT has to be preserved for the next time */
303:     /* this subroutine is called (therefore, CUTPNT is an INPUT parameter */
304:     /* and not to be changed). Thus, assign N to TMPCUT and use the local */
305:     /* variable TMPCUT from now on for the cut point. */

307:     tmpcut = n;
308:   }

310:   /* initialize the flag DEFL (indicates whether deflation occurred - */
311:   /* this information is needed later in DLAED3M) */

313:   *(unsigned char *)defl = '0';

315:   /* call DSRTDF for deflation */

317:   BDC_dsrtdf_(&k, n, n1, ev, q, ldq, indxq, rho, &work[iz],
318:           &work[idlmda], &work[iw], &work[iq2], &iwork[indx],
319:           &iwork[indxc], &iwork[indxp], &iwork[coltyp], tol, &dz, &de, info);
320:           
321:   if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dmerg2: error in dsrtdf, info = %d",*info);

323:   if (k < n) {

325:    /* ....some deflation occurred in dsrtdf, set the flag DEFL */
326:    /*     (needed in DLAED3M.f, since Givens rotations need to be */
327:    /*     applied to the eigenvector matrix only if some deflation */
328:    /*     happened) */

330:     *(unsigned char *)defl = '1';
331:   }

333: /* **************************************************************************** */

335:   /* Solve the Secular Equation. */

337:   if (k != 0 || k == 0) {

339:     /* ....not everything was deflated. */
340:      
341:     /* ....check whether enough workspace is available: */
342:      
343:     /* Note that the following (upper) bound SPNEED for the workspace */
344:     /* requirements should also hold in the extreme case TMPCUT=N, */
345:     /* which happens for every rank modification after the first one. */

347:     i__1 = (iwork[coltyp] + iwork[coltyp+1]) * k;
348:     i__2 = (iwork[coltyp+1] + iwork[coltyp + 2]) * k;
349:     spneed = is + PetscMax(i__1,i__2) - 1;

351:     if (spneed > lwork) SETERRQ1(PETSC_COMM_SELF,1,"dmerg2: Workspace needed exceeds the workspace provided by %d numbers",spneed-lwork);

353:     /* calling DLAED3M for solving the secular equation. */

355:     BDC_dlaed3m_(jobz, defl, k, n, tmpcut, ev, q, ldq, 
356:                 *rho, &work[idlmda], &work[iq2], &iwork[indxc], &iwork[coltyp],
357:                 &work[iw], &work[is], info, 1, 1);
358:     if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dmerg2: error in dlaed3m, info = %d",*info);

360:     /* Prepare the INDXQ sorting permutation. */

362:     n1 = k;
363:     n2 = n - k;
364:     PetscStackCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, ev, &one, &mone, indxq));
365:     if (k == 0) {
366:       for (i = 0; i < n; ++i) indxq[i] = i+1;
367:     }

369:   } else {

371:     /* ....everything was deflated (K.EQ.0) */

373:     for (i = 0; i < n; ++i) indxq[i] = i+1;
374:   }
375:   return(0);
376: #endif
377: }