Actual source code: hz.c
slepc-3.7.1 2016-05-27
1: /*
2: HZ iteration for generalized symmetric-indefinite eigenproblem.
3: Based on Matlab code from David Watkins.
5: References:
7: [1] D.S. Watkins, The Matrix Eigenvalue Problem: GR and Krylov Subspace
8: Methods, SIAM, 2007.
10: [2] M.A. Brebner, J. Grad, "Eigenvalues of Ax = lambda Bx for real
11: symmetric matrices A and B computed by reduction to pseudosymmetric
12: form and the HR process", Linear Alg. Appl. 43:99-118, 1982.
14: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
15: SLEPc - Scalable Library for Eigenvalue Problem Computations
16: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
18: This file is part of SLEPc.
20: SLEPc is free software: you can redistribute it and/or modify it under the
21: terms of version 3 of the GNU Lesser General Public License as published by
22: the Free Software Foundation.
24: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
25: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
27: more details.
29: You should have received a copy of the GNU Lesser General Public License
30: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
31: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
32: */
33: #include <slepc/private/dsimpl.h>
34: #include <slepcblaslapack.h>
38: /*
39: Sets up a 2-by-2 matrix to eliminate y in the vector [x y]'.
40: Transformation is rotator if sygn = 1 and hyperbolic if sygn = -1.
41: */
42: static PetscErrorCode UnifiedRotation(PetscReal x,PetscReal y,PetscReal sygn,PetscReal *rot,PetscReal *rcond,PetscBool *swap)
43: {
44: PetscReal nrm,c,s;
47: *swap = PETSC_FALSE;
48: if (y == 0) {
49: rot[0] = 1.0; rot[1] = 0.0; rot[2] = 0.0; rot[3] = 1.0;
50: *rcond = 1.0;
51: } else {
52: nrm = PetscMax(PetscAbs(x),PetscAbs(y));
53: c = x/nrm; s = y/nrm;
54: if (sygn == 1.0) { /* set up a rotator */
55: nrm = PetscSqrtReal(c*c+s*s);
56: c = c/nrm; s = s/nrm;
57: /* rot = [c s; -s c]; */
58: rot[0] = c; rot[1] = -s; rot[2] = s; rot[3] = c;
59: *rcond = 1.0;
60: } else if (sygn == -1) { /* set up a hyperbolic transformation */
61: nrm = c*c-s*s;
62: if (nrm > 0) nrm = PetscSqrtReal(nrm);
63: else if (nrm < 0) {
64: nrm = PetscSqrtReal(-nrm);
65: *swap = PETSC_TRUE;
66: } else /* breakdown */
67: SETERRQ(PETSC_COMM_SELF,1,"Breakdown in construction of hyperbolic transformation");
68: c = c/nrm; s = s/nrm;
69: /* rot = [c -s; -s c]; */
70: rot[0] = c; rot[1] = -s; rot[2] = -s; rot[3] = c;
71: *rcond = PetscAbs(PetscAbs(s)-PetscAbs(c))/(PetscAbs(s)+PetscAbs(c));
72: } else SETERRQ(PETSC_COMM_SELF,1,"Value of sygn sent to transetup must be 1 or -1");
73: }
74: return(0);
75: }
79: static PetscErrorCode HZStep(PetscBLASInt ntop,PetscBLASInt nn,PetscReal tr,PetscReal dt,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscInt n,PetscInt ld,PetscBool *flag)
80: {
82: PetscBLASInt one=1;
83: PetscInt k,jj,ii;
84: PetscBLASInt n_;
85: PetscReal bulge10,bulge20,bulge30,bulge31,bulge41,bulge42;
86: PetscReal sygn,rcond=1.0,worstcond,rot[4],buf[2],t;
87: PetscScalar rtmp;
88: PetscBool swap;
91: worstcond = 1.0;
92: PetscBLASIntCast(n,&n_);
94: /* Build initial bulge that sets step in motion */
95: bulge10 = dd[ntop+1]*(aa[ntop]*(aa[ntop] - dd[ntop]*tr) + dt*dd[ntop]*dd[ntop]) + dd[ntop]*bb[ntop]*bb[ntop];
96: bulge20 = bb[ntop]*(dd[ntop+1]*aa[ntop] + dd[ntop]*aa[ntop+1] - dd[ntop]*dd[ntop+1]*tr);
97: bulge30 = bb[ntop]*bb[ntop+1]*dd[ntop];
98: bulge31 = 0.0;
99: bulge41 = 0.0;
100: bulge42 = 0.0;
102: /* Chase the bulge */
103: for (jj=ntop;jj<nn-1;jj++) {
105: /* Check for trivial bulge */
106: if (jj>ntop && PetscMax(PetscMax(PetscAbs(bulge10),PetscAbs(bulge20)),PetscAbs(bulge30))<PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj]) + PetscAbs(aa[jj+1]))) {
107: bb[jj-1] = 0.0; /* deflate and move on */
109: } else { /* carry out the step */
111: /* Annihilate tip entry bulge30 */
112: if (bulge30 != 0.0) {
114: /* Make an interchange if necessary to ensure that the
115: first transformation is othogonal, not hyperbolic. */
116: if (dd[jj+1] != dd[jj+2]) { /* make an interchange */
117: if (dd[jj] != dd[jj+1]) { /* interchange 1st and 2nd */
118: buf[0] = bulge20; bulge20 = bulge10; bulge10 = buf[0];
119: buf[0] = aa[jj]; aa[jj] = aa[jj+1]; aa[jj+1] = buf[0];
120: buf[0] = bb[jj+1]; bb[jj+1] = bulge31; bulge31 = buf[0];
121: buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
122: for (k=0;k<n;k++) {
123: rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+1)*ld]; uu[k+(jj+1)*ld] = rtmp;
124: }
125: } else { /* interchange 1st and 3rd */
126: buf[0] = bulge30; bulge30 = bulge10; bulge10 = buf[0];
127: buf[0] = aa[jj]; aa[jj] = aa[jj+2]; aa[jj+2] = buf[0];
128: buf[0] = bb[jj]; bb[jj] = bb[jj+1]; bb[jj+1] = buf[0];
129: buf[0] = dd[jj]; dd[jj] = dd[jj+2]; dd[jj+2] = buf[0];
130: if (jj + 2 < nn-1) {
131: bulge41 = bb[jj+2];
132: bb[jj+2] = 0;
133: }
134: for (k=0;k<n;k++) {
135: rtmp = uu[k+jj*ld]; uu[k+jj*ld] = uu[k+(jj+2)*ld]; uu[k+(jj+2)*ld] = rtmp;
136: }
137: }
138: }
140: /* Set up transforming matrix rot. */
141: UnifiedRotation(bulge20,bulge30,1,rot,&rcond,&swap);
143: /* Apply transforming matrix rot to T. */
144: bulge20 = rot[0]*bulge20 + rot[2]*bulge30;
145: buf[0] = rot[0]*bb[jj] + rot[2]*bulge31;
146: buf[1] = rot[1]*bb[jj] + rot[3]*bulge31;
147: bb[jj] = buf[0];
148: bulge31 = buf[1];
149: buf[0] = rot[0]*rot[0]*aa[jj+1] + 2.0*rot[0]*rot[2]*bb[jj+1] + rot[2]*rot[2]*aa[jj+2];
150: buf[1] = rot[1]*rot[1]*aa[jj+1] + 2.0*rot[3]*rot[1]*bb[jj+1] + rot[3]*rot[3]*aa[jj+2];
151: bb[jj+1] = rot[1]*rot[0]*aa[jj+1] + rot[3]*rot[2]*aa[jj+2] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj+1];
152: aa[jj+1] = buf[0];
153: aa[jj+2] = buf[1];
154: if (jj + 2 < nn-1) {
155: bulge42 = bb[jj+2]*rot[2];
156: bb[jj+2] = bb[jj+2]*rot[3];
157: }
159: /* Accumulate transforming matrix */
160: PetscStackCallBLAS("BLASrot",BLASrot_(&n_,uu+(jj+1)*ld,&one,uu+(jj+2)*ld,&one,&rot[0],&rot[2]));
161: }
163: /* Annihilate inner entry bulge20 */
164: if (bulge20 != 0.0) {
166: /* Begin by setting up transforming matrix rot */
167: sygn = dd[jj]*dd[jj+1];
168: UnifiedRotation(bulge10,bulge20,sygn,rot,&rcond,&swap);
169: if (rcond<PETSC_MACHINE_EPSILON) {
170: SETERRQ1(PETSC_COMM_SELF,0,"Transforming matrix is numerically singular rcond=%g",rcond);
171: *flag = PETSC_TRUE;
172: return(0);
173: }
174: if (rcond < worstcond) worstcond = rcond;
176: /* Apply transforming matrix rot to T */
177: if (jj > ntop) bb[jj-1] = rot[0]*bulge10 + rot[2]*bulge20;
178: buf[0] = rot[0]*rot[0]*aa[jj] + 2*rot[0]*rot[2]*bb[jj] + rot[2]*rot[2]*aa[jj+1];
179: buf[1] = rot[1]*rot[1]*aa[jj] + 2*rot[3]*rot[1]*bb[jj] + rot[3]*rot[3]*aa[jj+1];
180: bb[jj] = rot[1]*rot[0]*aa[jj] + rot[3]*rot[2]*aa[jj+1] + (rot[3]*rot[0] + rot[1]*rot[2])*bb[jj];
181: aa[jj] = buf[0];
182: aa[jj+1] = buf[1];
183: if (jj + 1 < nn-1) {
184: /* buf = [ bulge31 bb(jj+1) ] * rot' */
185: buf[0] = rot[0]*bulge31 + rot[2]*bb[jj+1];
186: buf[1] = rot[1]*bulge31 + rot[3]*bb[jj+1];
187: bulge31 = buf[0];
188: bb[jj+1] = buf[1];
189: }
190: if (jj + 2 < nn-1) {
191: /* buf = [bulge41 bulge42] * rot' */
192: buf[0] = rot[0]*bulge41 + rot[2]*bulge42;
193: buf[1] = rot[1]*bulge41 + rot[3]*bulge42;
194: bulge41 = buf[0];
195: bulge42 = buf[1];
196: }
198: /* Apply transforming matrix rot to D */
199: if (swap == 1) {
200: buf[0] = dd[jj]; dd[jj] = dd[jj+1]; dd[jj+1] = buf[0];
201: }
203: /* Accumulate transforming matrix, uu(jj:jj+1,:) = rot*uu(jj:jj+1,:) */
204: if (sygn==1) {
205: PetscStackCallBLAS("BLASrot",BLASrot_(&n_,uu+jj*ld,&one,uu+(jj+1)*ld,&one,&rot[0],&rot[2]));
206: } else {
207: if (PetscAbsReal(rot[0])>PetscAbsReal(rot[1])) { /* Type I */
208: t = rot[1]/rot[0];
209: for (ii=0;ii<n;ii++) {
210: uu[jj*ld+ii] = rot[0]*uu[jj*ld+ii] + rot[1]*uu[(jj+1)*ld+ii];
211: uu[(jj+1)*ld+ii] = t*uu[jj*ld+ii] + uu[(jj+1)*ld+ii]/rot[0];
212: }
213: } else { /* Type II */
214: t = rot[0]/rot[1];
215: for (ii=0;ii<n;ii++) {
216: rtmp = uu[jj*ld+ii];
217: uu[jj*ld+ii] = rot[0]*uu[jj*ld+ii] + rot[1]*uu[(jj+1)*ld+ii];
218: uu[(jj+1)*ld+ii] = t*uu[jj*ld+ii] + rtmp/rot[1];
219: }
220: }
221: }
222: }
223: }
225: /* Adjust bulge for next step */
226: bulge10 = bb[jj];
227: bulge20 = bulge31;
228: bulge30 = bulge41;
229: bulge31 = bulge42;
230: bulge41 = 0.0;
231: bulge42 = 0.0;
232: }
233: return(0);
234: }
238: static PetscErrorCode HZIteration(PetscBLASInt nn,PetscBLASInt cgd,PetscReal *aa,PetscReal *bb,PetscReal *dd,PetscScalar *uu,PetscBLASInt ld)
239: {
241: PetscBLASInt j2,one=1;
242: PetscInt its,nits,nstop,jj,ntop,nbot,ntry;
243: PetscReal htr,det,dis,dif,tn,kt,c,s,tr,dt;
244: PetscBool flag=PETSC_FALSE;
247: its = 0;
248: nbot = nn-1;
249: nits = 0;
250: nstop = 40*(nn - cgd);
252: while (nbot >= cgd && nits < nstop) {
254: /* Check for zeros on the subdiagonal */
255: jj = nbot - 1;
256: while (jj>=cgd && PetscAbs(bb[jj])>PETSC_MACHINE_EPSILON*(PetscAbs(aa[jj])+PetscAbs(aa[jj+1]))) jj = jj-1;
257: if (jj>=cgd) bb[jj]=0;
258: ntop = jj + 1; /* starting point for step */
259: if (ntop == nbot) { /* isolate single eigenvalue */
260: nbot = ntop - 1;
261: its = 0;
262: } else if (ntop+1 == nbot) { /* isolate pair of eigenvalues */
263: htr = 0.5*(aa[ntop]*dd[ntop] + aa[nbot]*dd[nbot]);
264: det = dd[ntop]*dd[nbot]*(aa[ntop]*aa[nbot]-bb[ntop]*bb[ntop]);
265: dis = htr*htr - det;
266: if (dis > 0) { /* distinct real eigenvalues */
267: if (dd[ntop] == dd[nbot]) { /* separate the eigenvalues by a Jacobi rotator */
268: dif = aa[ntop]-aa[nbot];
269: if (2.0*PetscAbs(bb[ntop])<=dif) {
270: tn = 2*bb[ntop]/dif;
271: tn = tn/(1.0 + PetscSqrtScalar(1.0+tn*tn));
272: } else {
273: kt = dif/(2.0*bb[ntop]);
274: tn = PetscSign(kt)/(PetscAbs(kt)+PetscSqrtScalar(1.0+kt*kt));
275: }
276: c = 1.0/PetscSqrtScalar(1.0 + tn*tn);
277: s = c*tn;
278: aa[ntop] = aa[ntop] + tn*bb[ntop];
279: aa[nbot] = aa[nbot] - tn*bb[ntop];
280: bb[ntop] = 0;
281: j2 = nn-cgd;
282: PetscStackCallBLAS("BLASrot",BLASrot_(&j2,uu+ntop*ld+cgd,&one,uu+nbot*ld+cgd,&one,&c,&s));
283: }
284: }
285: nbot = ntop - 1;
286: } else { /* Do an HZ iteration */
287: its = its + 1;
288: nits = nits + 1;
289: tr = aa[nbot-1]*dd[nbot-1] + aa[nbot]*dd[nbot];
290: dt = dd[nbot-1]*dd[nbot]*(aa[nbot-1]*aa[nbot]-bb[nbot-1]*bb[nbot-1]);
291: for (ntry=1;ntry<=6;ntry++) {
292: HZStep(ntop,nbot+1,tr,dt,aa,bb,dd,uu,nn,ld,&flag);
293: if (!flag) break;
294: else if (ntry == 6) SETERRQ(PETSC_COMM_SELF,1,"Unable to complete hz step on six tries");
295: else {
296: tr = 0.9*tr; dt = 0.81*dt;
297: }
298: }
299: }
300: }
301: return(0);
302: }
306: PetscErrorCode DSSolve_GHIEP_HZ(DS ds,PetscScalar *wr,PetscScalar *wi)
307: {
309: PetscInt off;
310: PetscBLASInt n1,ld;
311: PetscScalar *A,*B,*Q;
312: PetscReal *d,*e,*s;
313: #if defined(PETSC_USE_COMPLEX)
314: PetscInt i;
315: #endif
318: #if !defined(PETSC_USE_COMPLEX)
320: #endif
321: PetscBLASIntCast(ds->ld,&ld);
322: n1 = ds->n - ds->l;
323: off = ds->l + ds->l*ld;
324: A = ds->mat[DS_MAT_A];
325: B = ds->mat[DS_MAT_B];
326: Q = ds->mat[DS_MAT_Q];
327: d = ds->rmat[DS_MAT_T];
328: e = ds->rmat[DS_MAT_T] + ld;
329: s = ds->rmat[DS_MAT_D];
330: /* Quick return */
331: if (n1 == 1) {
332: *(Q+off) = 1;
333: if (ds->compact) {
334: wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
335: } else {
336: d[ds->l] = PetscRealPart(A[off]); s[ds->l] = PetscRealPart(B[off]);
337: wr[ds->l] = d[ds->l]/s[ds->l]; wi[ds->l] = 0.0;
338: }
339: return(0);
340: }
341: /* Reduce to pseudotriadiagonal form */
342: DSIntermediate_GHIEP(ds);
343: HZIteration(ds->n,ds->l,d,e,s,Q,ld);
344: if (!ds->compact) {
345: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
346: }
347: /* Undo from diagonal the blocks whith real eigenvalues*/
348: DSGHIEPRealBlocks(ds);
350: /* Recover eigenvalues from diagonal */
351: DSGHIEPComplexEigs(ds,0,ds->n,wr,wi);
352: #if defined(PETSC_USE_COMPLEX)
353: if (wi) {
354: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
355: }
356: #endif
357: ds->t = ds->n;
358: return(0);
359: }