Actual source code: bvlapack.c
slepc-3.13.0 2020-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: BV private kernels that use the LAPACK
12: */
14: #include <slepc/private/bvimpl.h>
15: #include <slepcblaslapack.h>
17: /*
18: Compute ||A|| for an mxn matrix
19: */
20: PetscErrorCode BVNorm_LAPACK_Private(BV bv,PetscInt m_,PetscInt n_,const PetscScalar *A,NormType type,PetscReal *nrm,PetscBool mpi)
21: {
23: PetscBLASInt m,n,i,j;
24: PetscMPIInt len;
25: PetscReal lnrm,*rwork=NULL,*rwork2=NULL;
28: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
29: PetscBLASIntCast(m_,&m);
30: PetscBLASIntCast(n_,&n);
31: if (type==NORM_FROBENIUS || type==NORM_2) {
32: lnrm = LAPACKlange_("F",&m,&n,(PetscScalar*)A,&m,rwork);
33: if (mpi) {
34: lnrm = lnrm*lnrm;
35: MPI_Allreduce(&lnrm,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)bv));
36: *nrm = PetscSqrtReal(*nrm);
37: } else *nrm = lnrm;
38: PetscLogFlops(2.0*m*n);
39: } else if (type==NORM_1) {
40: if (mpi) {
41: BVAllocateWork_Private(bv,2*n_);
42: rwork = (PetscReal*)bv->work;
43: rwork2 = rwork+n_;
44: PetscArrayzero(rwork,n_);
45: PetscArrayzero(rwork2,n_);
46: for (j=0;j<n_;j++) {
47: for (i=0;i<m_;i++) {
48: rwork[j] += PetscAbsScalar(A[i+j*m_]);
49: }
50: }
51: PetscMPIIntCast(n_,&len);
52: MPI_Allreduce(rwork,rwork2,len,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)bv));
53: *nrm = 0.0;
54: for (j=0;j<n_;j++) if (rwork2[j] > *nrm) *nrm = rwork2[j];
55: } else {
56: *nrm = LAPACKlange_("O",&m,&n,(PetscScalar*)A,&m,rwork);
57: }
58: PetscLogFlops(1.0*m*n);
59: } else if (type==NORM_INFINITY) {
60: BVAllocateWork_Private(bv,m_);
61: rwork = (PetscReal*)bv->work;
62: lnrm = LAPACKlange_("I",&m,&n,(PetscScalar*)A,&m,rwork);
63: if (mpi) {
64: MPI_Allreduce(&lnrm,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)bv));
65: } else *nrm = lnrm;
66: PetscLogFlops(1.0*m*n);
67: }
68: PetscFPTrapPop();
69: return(0);
70: }
72: /*
73: Compute the upper Cholesky factor in R and its inverse in S.
74: If S == R then the inverse overwrites the Cholesky factor.
75: */
76: PetscErrorCode BVMatCholInv_LAPACK_Private(BV bv,Mat R,Mat S)
77: {
79: PetscInt i,k,l,n,m,ld,lds;
80: PetscScalar *pR,*pS;
81: PetscBLASInt info,n_,m_,ld_,lds_;
84: l = bv->l;
85: k = bv->k;
86: MatGetSize(R,&m,NULL);
87: n = k-l;
88: PetscBLASIntCast(m,&m_);
89: PetscBLASIntCast(n,&n_);
90: ld = m;
91: ld_ = m_;
92: MatDenseGetArray(R,&pR);
94: if (S==R) {
95: BVAllocateWork_Private(bv,m*k);
96: pS = bv->work;
97: lds = ld;
98: lds_ = ld_;
99: } else {
100: MatDenseGetArray(S,&pS);
101: MatGetSize(S,&lds,NULL);
102: PetscBLASIntCast(lds,&lds_);
103: }
105: /* save a copy of matrix in S */
106: for (i=l;i<k;i++) {
107: PetscArraycpy(pS+i*lds+l,pR+i*ld+l,n);
108: }
110: /* compute upper Cholesky factor in R */
111: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n_,pR+l*ld+l,&ld_,&info));
112: PetscLogFlops((1.0*n*n*n)/3.0);
114: if (info) { /* LAPACKpotrf failed, retry on diagonally perturbed matrix */
115: for (i=l;i<k;i++) {
116: PetscArraycpy(pR+i*ld+l,pS+i*lds+l,n);
117: pR[i+i*ld] += 50.0*PETSC_MACHINE_EPSILON;
118: }
119: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n_,pR+l*ld+l,&ld_,&info));
120: SlepcCheckLapackInfo("potrf",info);
121: PetscLogFlops((1.0*n*n*n)/3.0);
122: }
124: /* compute S = inv(R) */
125: if (S==R) {
126: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pR+l*ld+l,&ld_,&info));
127: } else {
128: PetscArrayzero(pS+l*lds,(k-l)*k);
129: for (i=l;i<k;i++) {
130: PetscArraycpy(pS+i*lds+l,pR+i*ld+l,n);
131: }
132: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pS+l*lds+l,&lds_,&info));
133: }
134: SlepcCheckLapackInfo("trtri",info);
135: PetscLogFlops(0.33*n*n*n);
137: /* Zero out entries below the diagonal */
138: for (i=l;i<k-1;i++) {
139: PetscArrayzero(pR+i*ld+i+1,(k-i-1));
140: if (S!=R) { PetscArrayzero(pS+i*lds+i+1,(k-i-1)); }
141: }
142: MatDenseRestoreArray(R,&pR);
143: if (S!=R) { MatDenseRestoreArray(S,&pS); }
144: return(0);
145: }
147: /*
148: Compute the inverse of an upper triangular matrix R, store it in S.
149: If S == R then the inverse overwrites R.
150: */
151: PetscErrorCode BVMatTriInv_LAPACK_Private(BV bv,Mat R,Mat S)
152: {
154: PetscInt i,k,l,n,m,ld,lds;
155: PetscScalar *pR,*pS;
156: PetscBLASInt info,n_,m_,ld_,lds_;
159: l = bv->l;
160: k = bv->k;
161: MatGetSize(R,&m,NULL);
162: n = k-l;
163: PetscBLASIntCast(m,&m_);
164: PetscBLASIntCast(n,&n_);
165: ld = m;
166: ld_ = m_;
167: MatDenseGetArray(R,&pR);
169: if (S==R) {
170: BVAllocateWork_Private(bv,m*k);
171: pS = bv->work;
172: lds = ld;
173: lds_ = ld_;
174: } else {
175: MatDenseGetArray(S,&pS);
176: MatGetSize(S,&lds,NULL);
177: PetscBLASIntCast(lds,&lds_);
178: }
180: /* compute S = inv(R) */
181: if (S==R) {
182: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pR+l*ld+l,&ld_,&info));
183: } else {
184: PetscArrayzero(pS+l*lds,(k-l)*k);
185: for (i=l;i<k;i++) {
186: PetscArraycpy(pS+i*lds+l,pR+i*ld+l,n);
187: }
188: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pS+l*lds+l,&lds_,&info));
189: }
190: SlepcCheckLapackInfo("trtri",info);
191: PetscLogFlops(0.33*n*n*n);
193: MatDenseRestoreArray(R,&pR);
194: if (S!=R) { MatDenseRestoreArray(S,&pS); }
195: return(0);
196: }
198: /*
199: Compute the matrix to be used for post-multiplying the basis in the SVQB
200: block orthogonalization method.
201: On input R = V'*V, on output S = D*U*Lambda^{-1/2} where (U,Lambda) is
202: the eigendecomposition of D*R*D with D=diag(R)^{-1/2}.
203: If S == R then the result overwrites R.
204: */
205: PetscErrorCode BVMatSVQB_LAPACK_Private(BV bv,Mat R,Mat S)
206: {
208: PetscInt i,j,k,l,n,m,ld,lds;
209: PetscScalar *pR,*pS,*D,*work,a;
210: PetscReal *eig,dummy;
211: PetscBLASInt info,lwork,n_,m_,ld_,lds_;
212: #if defined(PETSC_USE_COMPLEX)
213: PetscReal *rwork,rdummy;
214: #endif
217: l = bv->l;
218: k = bv->k;
219: MatGetSize(R,&m,NULL);
220: n = k-l;
221: PetscBLASIntCast(m,&m_);
222: PetscBLASIntCast(n,&n_);
223: ld = m;
224: ld_ = m_;
225: MatDenseGetArray(R,&pR);
227: if (S==R) {
228: pS = pR;
229: lds = ld;
230: lds_ = ld_;
231: } else {
232: MatDenseGetArray(S,&pS);
233: MatGetSize(S,&lds,NULL);
234: PetscBLASIntCast(lds,&lds_);
235: }
237: /* workspace query and memory allocation */
238: lwork = -1;
239: #if defined(PETSC_USE_COMPLEX)
240: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS,&lds_,&dummy,&a,&lwork,&rdummy,&info));
241: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
242: PetscMalloc4(n,&eig,n,&D,lwork,&work,PetscMax(1,3*n-2),&rwork);
243: #else
244: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS,&lds_,&dummy,&a,&lwork,&info));
245: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
246: PetscMalloc3(n,&eig,n,&D,lwork,&work);
247: #endif
249: /* copy and scale matrix */
250: for (i=l;i<k;i++) D[i-l] = 1.0/PetscSqrtReal(PetscRealPart(pR[i+i*ld]));
251: for (i=l;i<k;i++) for (j=l;j<k;j++) pS[i+j*lds] = pR[i+j*ld]*D[i-l];
252: for (j=l;j<k;j++) for (i=l;i<k;i++) pS[i+j*lds] *= D[j-l];
254: /* compute eigendecomposition */
255: #if defined(PETSC_USE_COMPLEX)
256: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS+l*lds+l,&lds_,eig,work,&lwork,rwork,&info));
257: #else
258: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS+l*lds+l,&lds_,eig,work,&lwork,&info));
259: #endif
260: SlepcCheckLapackInfo("syev",info);
262: if (S!=R) { /* R = U' */
263: for (i=l;i<k;i++) for (j=l;j<k;j++) pR[i+j*ld] = pS[j+i*lds];
264: }
266: /* compute S = D*U*Lambda^{-1/2} */
267: for (i=l;i<k;i++) for (j=l;j<k;j++) pS[i+j*lds] *= D[i-l];
268: for (j=l;j<k;j++) for (i=l;i<k;i++) pS[i+j*lds] /= PetscSqrtReal(eig[j-l]);
270: if (S!=R) { /* compute R = inv(S) = Lambda^{1/2}*U'/D */
271: for (i=l;i<k;i++) for (j=l;j<k;j++) pR[i+j*ld] *= PetscSqrtReal(eig[i-l]);
272: for (j=l;j<k;j++) for (i=l;i<k;i++) pR[i+j*ld] /= D[j-l];
273: }
275: #if defined(PETSC_USE_COMPLEX)
276: PetscFree4(eig,D,work,rwork);
277: #else
278: PetscFree3(eig,D,work);
279: #endif
280: PetscLogFlops(9.0*n*n*n);
282: MatDenseRestoreArray(R,&pR);
283: if (S!=R) { MatDenseRestoreArray(S,&pS); }
284: return(0);
285: }
287: /*
288: QR factorization of an mxn matrix via parallel TSQR
289: */
290: PetscErrorCode BVOrthogonalize_LAPACK_TSQR(BV bv,PetscInt m_,PetscInt n_,PetscScalar *Q,PetscScalar *R,PetscInt ldr)
291: {
293: PetscInt level,plevel,nlevels,powtwo,lda,worklen;
294: PetscBLASInt m,n,i,j,k,l,s,nb,sz,lwork,info;
295: PetscScalar *tau,*work,*A=NULL,*QQ=NULL,*Qhalf,*C=NULL,one=1.0,zero=0.0;
296: PetscMPIInt rank,size,count,stride;
297: MPI_Datatype tmat;
300: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
301: PetscBLASIntCast(m_,&m);
302: PetscBLASIntCast(n_,&n);
303: k = PetscMin(m,n);
304: nb = 16;
305: lda = 2*n;
306: MPI_Comm_size(PetscObjectComm((PetscObject)bv),&size);
307: MPI_Comm_rank(PetscObjectComm((PetscObject)bv),&rank);
308: nlevels = (PetscInt)PetscCeilReal(PetscLog2Real((PetscReal)size));
309: powtwo = PetscPowInt(2,(PetscInt)PetscFloorReal(PetscLog2Real((PetscReal)size)));
310: worklen = n+n*nb;
311: if (nlevels) worklen += n*lda+n*lda*nlevels+n*lda;
312: BVAllocateWork_Private(bv,worklen);
313: tau = bv->work;
314: work = bv->work+n;
315: PetscBLASIntCast(n*nb,&lwork);
316: if (nlevels) {
317: A = bv->work+n+n*nb;
318: QQ = bv->work+n+n*nb+n*lda;
319: C = bv->work+n+n*nb+n*lda+n*lda*nlevels;
320: }
322: /* Compute QR */
323: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&m,&n,Q,&m,tau,work,&lwork,&info));
324: SlepcCheckLapackInfo("geqrf",info);
326: /* Extract R */
327: if (R || nlevels) {
328: for (j=0;j<n;j++) {
329: for (i=0;i<=PetscMin(j,m-1);i++) {
330: if (nlevels) A[i+j*lda] = Q[i+j*m];
331: else R[i+j*ldr] = Q[i+j*m];
332: }
333: for (i=PetscMin(j,m-1)+1;i<n;i++) {
334: if (nlevels) A[i+j*lda] = 0.0;
335: else R[i+j*ldr] = 0.0;
336: }
337: }
338: }
340: /* Compute orthogonal matrix in Q */
341: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&m,&k,&k,Q,&m,tau,work,&lwork,&info));
342: SlepcCheckLapackInfo("orgqr",info);
344: if (nlevels) {
346: PetscMPIIntCast(n,&count);
347: PetscMPIIntCast(lda,&stride);
348: PetscBLASIntCast(lda,&l);
349: MPI_Type_vector(count,count,stride,MPIU_SCALAR,&tmat);
350: MPI_Type_commit(&tmat);
352: for (level=nlevels;level>=1;level--) {
354: plevel = PetscPowInt(2,level);
355: s = plevel*PetscFloorReal(rank/(PetscReal)plevel)+(rank+PetscPowInt(2,level-1))%plevel;
357: /* Stack triangular matrices */
358: if (rank<s && s<size) { /* send top part, receive bottom part */
359: MPI_Sendrecv(A,1,tmat,s,111,A+n,1,tmat,s,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
360: } else if (s<size) { /* copy top to bottom, receive top part */
361: MPI_Sendrecv(A,1,tmat,rank,111,A+n,1,tmat,rank,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
362: MPI_Sendrecv(A+n,1,tmat,s,111,A,1,tmat,s,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
363: }
364: if (level<nlevels && size!=powtwo) { /* for cases when size is not a power of 2 */
365: if (rank<size-powtwo) { /* send bottom part */
366: MPI_Send(A+n,1,tmat,rank+powtwo,111,PetscObjectComm((PetscObject)bv));
367: } else if (rank>=powtwo) { /* receive bottom part */
368: MPI_Recv(A+n,1,tmat,rank-powtwo,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
369: }
370: }
371: /* Compute QR and build orthogonal matrix */
372: if (level<nlevels || (level==nlevels && s<size)) {
373: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&l,&n,A,&l,tau,work,&lwork,&info));
374: SlepcCheckLapackInfo("geqrf",info);
375: PetscArraycpy(QQ+(level-1)*n*lda,A,n*lda);
376: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&l,&n,&n,QQ+(level-1)*n*lda,&l,tau,work,&lwork,&info));
377: SlepcCheckLapackInfo("orgqr",info);
378: for (j=0;j<n;j++) {
379: for (i=j+1;i<n;i++) A[i+j*lda] = 0.0;
380: }
381: } else if (level==nlevels) { /* only one triangular matrix, set Q=I */
382: PetscArrayzero(QQ+(level-1)*n*lda,n*lda);
383: for (j=0;j<n;j++) QQ[j+j*lda+(level-1)*n*lda] = 1.0;
384: }
385: }
387: /* Extract R */
388: if (R) {
389: for (j=0;j<n;j++) {
390: for (i=0;i<=j;i++) R[i+j*ldr] = A[i+j*lda];
391: for (i=j+1;i<n;i++) R[i+j*ldr] = 0.0;
392: }
393: }
395: /* Accumulate orthogonal matrices */
396: for (level=1;level<=nlevels;level++) {
397: plevel = PetscPowInt(2,level);
398: s = plevel*PetscFloorReal(rank/(PetscReal)plevel)+(rank+PetscPowInt(2,level-1))%plevel;
399: Qhalf = (rank<s)? QQ+(level-1)*n*lda: QQ+(level-1)*n*lda+n;
400: if (level<nlevels) {
401: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&n,&one,QQ+level*n*lda,&l,Qhalf,&l,&zero,C,&l));
402: PetscArraycpy(QQ+level*n*lda,C,n*lda);
403: } else {
404: for (i=0;i<m/l;i++) {
405: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&n,&one,Q+i*l,&m,Qhalf,&l,&zero,C,&l));
406: for (j=0;j<n;j++) { PetscArraycpy(Q+i*l+j*m,C+j*l,l); }
407: }
408: sz = m%l;
409: if (sz) {
410: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sz,&n,&n,&one,Q+(m/l)*l,&m,Qhalf,&l,&zero,C,&l));
411: for (j=0;j<n;j++) { PetscArraycpy(Q+(m/l)*l+j*m,C+j*l,sz); }
412: }
413: }
414: }
416: MPI_Type_free(&tmat);
417: }
419: PetscLogFlops(3.0*m*n*n);
420: PetscFPTrapPop();
421: return(0);
422: }
424: /*
425: Reduction operation to compute [~,Rout]=qr([Rin1;Rin2]) in the TSQR algorithm;
426: all matrices are upper triangular stored in packed format
427: */
428: SLEPC_EXTERN void MPIAPI SlepcGivensPacked(void *in,void *inout,PetscMPIInt *len,MPI_Datatype *datatype)
429: {
431: PetscBLASInt n,i,j,k,one=1;
432: PetscMPIInt tsize;
433: PetscScalar v,s,*R2=(PetscScalar*)in,*R1=(PetscScalar*)inout;
434: PetscReal c;
437: MPI_Type_size(*datatype,&tsize);CHKERRABORT(PETSC_COMM_SELF,ierr); /* we assume len=1 */
438: tsize /= sizeof(PetscScalar);
439: n = (-1+(PetscBLASInt)PetscSqrtReal(1+8*tsize))/2;
440: for (j=0;j<n;j++) {
441: for (i=0;i<=j;i++) {
442: LAPACKlartg_(R1+(2*n-j-1)*j/2+j,R2+(2*n-i-1)*i/2+j,&c,&s,&v);
443: R1[(2*n-j-1)*j/2+j] = v;
444: k = n-j-1;
445: if (k) BLASrot_(&k,R1+(2*n-j-1)*j/2+j+1,&one,R2+(2*n-i-1)*i/2+j+1,&one,&c,&s);
446: }
447: }
448: PetscFunctionReturnVoid();
449: }
451: /*
452: Computes the R factor of the QR factorization of an mxn matrix via parallel TSQR
453: */
454: PetscErrorCode BVOrthogonalize_LAPACK_TSQR_OnlyR(BV bv,PetscInt m_,PetscInt n_,PetscScalar *Q,PetscScalar *R,PetscInt ldr)
455: {
457: PetscInt worklen;
458: PetscBLASInt m,n,i,j,s,nb,lwork,info;
459: PetscScalar *tau,*work,*A=NULL,*R1=NULL,*R2=NULL;
460: PetscMPIInt size,count;
461: MPI_Datatype tmat;
464: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
465: PetscBLASIntCast(m_,&m);
466: PetscBLASIntCast(n_,&n);
467: nb = 16;
468: s = n+n*(n-1)/2; /* length of packed triangular matrix */
469: MPI_Comm_size(PetscObjectComm((PetscObject)bv),&size);
470: worklen = n+n*nb+2*s+m*n;
471: BVAllocateWork_Private(bv,worklen);
472: tau = bv->work;
473: work = bv->work+n;
474: R1 = bv->work+n+n*nb;
475: R2 = bv->work+n+n*nb+s;
476: A = bv->work+n+n*nb+2*s;
477: PetscBLASIntCast(n*nb,&lwork);
478: PetscArraycpy(A,Q,m*n);
480: /* Compute QR */
481: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&m,&n,A,&m,tau,work,&lwork,&info));
482: SlepcCheckLapackInfo("geqrf",info);
484: if (size==1) {
485: /* Extract R */
486: for (j=0;j<n;j++) {
487: for (i=0;i<=PetscMin(j,m-1);i++) R[i+j*ldr] = A[i+j*m];
488: for (i=PetscMin(j,m-1)+1;i<n;i++) R[i+j*ldr] = 0.0;
489: }
490: } else {
491: /* Use MPI reduction operation to obtain global R */
492: PetscMPIIntCast(s,&count);
493: MPI_Type_contiguous(count,MPIU_SCALAR,&tmat);
494: MPI_Type_commit(&tmat);
495: for (i=0;i<n;i++) {
496: for (j=i;j<n;j++) R1[(2*n-i-1)*i/2+j] = (i<m)?A[i+j*m]:0.0;
497: }
498: #if defined(PETSC_HAVE_I_MPI_NUMVERSION)
499: MPI_Reduce(R1,R2,1,tmat,MPIU_TSQR,0,PetscObjectComm((PetscObject)bv));
500: MPI_Bcast(R2,1,tmat,0,PetscObjectComm((PetscObject)bv));
501: #else
502: MPI_Allreduce(R1,R2,1,tmat,MPIU_TSQR,PetscObjectComm((PetscObject)bv));
503: #endif
504: for (i=0;i<n;i++) {
505: for (j=0;j<i;j++) R[i+j*ldr] = 0.0;
506: for (j=i;j<n;j++) R[i+j*ldr] = R2[(2*n-i-1)*i/2+j];
507: }
508: MPI_Type_free(&tmat);
509: }
511: PetscLogFlops(3.0*m*n*n);
512: PetscFPTrapPop();
513: return(0);
514: }