Actual source code: dt.c

petsc-3.4.2 2013-07-02
  1: /* Discretization tools */

  3: #include <petscconf.h>
  4: #if defined(PETSC_HAVE_MATHIMF_H)
  5: #include <mathimf.h>           /* this needs to be included before math.h */
  6: #endif

  8: #include <petscdt.h>            /*I "petscdt.h" I*/
  9: #include <petscblaslapack.h>
 10: #include <petsc-private/petscimpl.h>
 11: #include <petscviewer.h>

 15: /*@
 16:    PetscDTLegendreEval - evaluate Legendre polynomial at points

 18:    Not Collective

 20:    Input Arguments:
 21: +  npoints - number of spatial points to evaluate at
 22: .  points - array of locations to evaluate at
 23: .  ndegree - number of basis degrees to evaluate
 24: -  degrees - sorted array of degrees to evaluate

 26:    Output Arguments:
 27: +  B - row-oriented basis evaluation matrix B[point*ndegree + degree] (dimension npoints*ndegrees, allocated by caller) (or NULL)
 28: .  D - row-oriented derivative evaluation matrix (or NULL)
 29: -  D2 - row-oriented second derivative evaluation matrix (or NULL)

 31:    Level: intermediate

 33: .seealso: PetscDTGaussQuadrature()
 34: @*/
 35: PetscErrorCode PetscDTLegendreEval(PetscInt npoints,const PetscReal *points,PetscInt ndegree,const PetscInt *degrees,PetscReal *B,PetscReal *D,PetscReal *D2)
 36: {
 37:   PetscInt i,maxdegree;

 40:   if (!npoints || !ndegree) return(0);
 41:   maxdegree = degrees[ndegree-1];
 42:   for (i=0; i<npoints; i++) {
 43:     PetscReal pm1,pm2,pd1,pd2,pdd1,pdd2,x;
 44:     PetscInt  j,k;
 45:     x    = points[i];
 46:     pm2  = 0;
 47:     pm1  = 1;
 48:     pd2  = 0;
 49:     pd1  = 0;
 50:     pdd2 = 0;
 51:     pdd1 = 0;
 52:     k    = 0;
 53:     if (degrees[k] == 0) {
 54:       if (B) B[i*ndegree+k] = pm1;
 55:       if (D) D[i*ndegree+k] = pd1;
 56:       if (D2) D2[i*ndegree+k] = pdd1;
 57:       k++;
 58:     }
 59:     for (j=1; j<=maxdegree; j++,k++) {
 60:       PetscReal p,d,dd;
 61:       p    = ((2*j-1)*x*pm1 - (j-1)*pm2)/j;
 62:       d    = pd2 + (2*j-1)*pm1;
 63:       dd   = pdd2 + (2*j-1)*pd1;
 64:       pm2  = pm1;
 65:       pm1  = p;
 66:       pd2  = pd1;
 67:       pd1  = d;
 68:       pdd2 = pdd1;
 69:       pdd1 = dd;
 70:       if (degrees[k] == j) {
 71:         if (B) B[i*ndegree+k] = p;
 72:         if (D) D[i*ndegree+k] = d;
 73:         if (D2) D2[i*ndegree+k] = dd;
 74:       }
 75:     }
 76:   }
 77:   return(0);
 78: }

 82: /*@
 83:    PetscDTGaussQuadrature - create Gauss quadrature

 85:    Not Collective

 87:    Input Arguments:
 88: +  npoints - number of points
 89: .  a - left end of interval (often-1)
 90: -  b - right end of interval (often +1)

 92:    Output Arguments:
 93: +  x - quadrature points
 94: -  w - quadrature weights

 96:    Level: intermediate

 98:    References:
 99:    Golub and Welsch, Calculation of Quadrature Rules, Math. Comp. 23(106), 221--230, 1969.

101: .seealso: PetscDTLegendreEval()
102: @*/
103: PetscErrorCode PetscDTGaussQuadrature(PetscInt npoints,PetscReal a,PetscReal b,PetscReal *x,PetscReal *w)
104: {
106:   PetscInt       i;
107:   PetscReal      *work;
108:   PetscScalar    *Z;
109:   PetscBLASInt   N,LDZ,info;

112:   /* Set up the Golub-Welsch system */
113:   for (i=0; i<npoints; i++) {
114:     x[i] = 0;                   /* diagonal is 0 */
115:     if (i) w[i-1] = 0.5 / PetscSqrtReal(1 - 1./PetscSqr(2*i));
116:   }
117:   PetscRealView(npoints-1,w,PETSC_VIEWER_STDOUT_SELF);
118:   PetscMalloc2(npoints*npoints,PetscScalar,&Z,PetscMax(1,2*npoints-2),PetscReal,&work);
119:   PetscBLASIntCast(npoints,&N);
120:   LDZ  = N;
121:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
122:   PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("I",&N,x,w,Z,&LDZ,work,&info));
123:   PetscFPTrapPop();
124:   if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"xSTEQR error");

126:   for (i=0; i<(npoints+1)/2; i++) {
127:     PetscReal y = 0.5 * (-x[i] + x[npoints-i-1]); /* enforces symmetry */
128:     x[i]           = (a+b)/2 - y*(b-a)/2;
129:     x[npoints-i-1] = (a+b)/2 + y*(b-a)/2;

131:     w[i] = w[npoints-1-i] = (b-a)*PetscSqr(0.5*PetscAbsScalar(Z[i*npoints] + Z[(npoints-i-1)*npoints]));
132:   }
133:   PetscFree2(Z,work);
134:   return(0);
135: }

139: /* Evaluates the nth jacobi polynomial with weight parameters a,b at a point x.
140:    Recurrence relations implemented from the pseudocode given in Karniadakis and Sherwin, Appendix B */
141: PETSC_STATIC_INLINE PetscErrorCode PetscDTFactorial_Internal(PetscInt n, PetscReal *factorial)
142: {
143:   PetscReal f = 1.0;
144:   PetscInt  i;

147:   for (i = 1; i < n+1; ++i) f *= i;
148:   *factorial = f;
149:   return(0);
150: }

154: /* Evaluates the nth jacobi polynomial with weight parameters a,b at a point x.
155:    Recurrence relations implemented from the pseudocode given in Karniadakis and Sherwin, Appendix B */
156: PETSC_STATIC_INLINE PetscErrorCode PetscDTComputeJacobi(PetscReal a, PetscReal b, PetscInt n, PetscReal x, PetscReal *P)
157: {
158:   PetscReal apb, pn1, pn2;
159:   PetscInt  k;

162:   if (!n) {*P = 1.0; return(0);}
163:   if (n == 1) {*P = 0.5 * (a - b + (a + b + 2.0) * x); return(0);}
164:   apb = a + b;
165:   pn2 = 1.0;
166:   pn1 = 0.5 * (a - b + (apb + 2.0) * x);
167:   *P  = 0.0;
168:   for (k = 2; k < n+1; ++k) {
169:     PetscReal a1 = 2.0 * k * (k + apb) * (2.0*k + apb - 2.0);
170:     PetscReal a2 = (2.0 * k + apb - 1.0) * (a*a - b*b);
171:     PetscReal a3 = (2.0 * k + apb - 2.0) * (2.0 * k + apb - 1.0) * (2.0 * k + apb);
172:     PetscReal a4 = 2.0 * (k + a - 1.0) * (k + b - 1.0) * (2.0 * k + apb);

174:     a2  = a2 / a1;
175:     a3  = a3 / a1;
176:     a4  = a4 / a1;
177:     *P  = (a2 + a3 * x) * pn1 - a4 * pn2;
178:     pn2 = pn1;
179:     pn1 = *P;
180:   }
181:   return(0);
182: }

186: /* Evaluates the first derivative of P_{n}^{a,b} at a point x. */
187: PETSC_STATIC_INLINE PetscErrorCode PetscDTComputeJacobiDerivative(PetscReal a, PetscReal b, PetscInt n, PetscReal x, PetscReal *P)
188: {
189:   PetscReal      nP;

193:   if (!n) {*P = 0.0; return(0);}
194:   PetscDTComputeJacobi(a+1, b+1, n-1, x, &nP);
195:   *P   = 0.5 * (a + b + n + 1) * nP;
196:   return(0);
197: }

201: /* Maps from [-1,1]^2 to the (-1,1) reference triangle */
202: PETSC_STATIC_INLINE PetscErrorCode PetscDTMapSquareToTriangle_Internal(PetscReal x, PetscReal y, PetscReal *xi, PetscReal *eta)
203: {
205:   *xi  = 0.5 * (1.0 + x) * (1.0 - y) - 1.0;
206:   *eta = y;
207:   return(0);
208: }

212: /* Maps from [-1,1]^2 to the (-1,1) reference triangle */
213: PETSC_STATIC_INLINE PetscErrorCode PetscDTMapCubeToTetrahedron_Internal(PetscReal x, PetscReal y, PetscReal z, PetscReal *xi, PetscReal *eta, PetscReal *zeta)
214: {
216:   *xi   = 0.25 * (1.0 + x) * (1.0 - y) * (1.0 - z) - 1.0;
217:   *eta  = 0.5  * (1.0 + y) * (1.0 - z) - 1.0;
218:   *zeta = z;
219:   return(0);
220: }

224: static PetscErrorCode PetscDTGaussJacobiQuadrature1D_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscReal *x, PetscReal *w)
225: {
226:   PetscInt       maxIter = 100;
227:   PetscReal      eps     = 1.0e-8;
228:   PetscReal      a1, a2, a3, a4, a5, a6;
229:   PetscInt       k;


234:   a1      = pow(2, a+b+1);
235: #if defined(PETSC_HAVE_TGAMMA)
236:   a2      = tgamma(a + npoints + 1);
237:   a3      = tgamma(b + npoints + 1);
238:   a4      = tgamma(a + b + npoints + 1);
239: #else
240:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"tgamma() - math routine is unavailable.");
241: #endif

243:   PetscDTFactorial_Internal(npoints, &a5);
244:   a6   = a1 * a2 * a3 / a4 / a5;
245:   /* Computes the m roots of P_{m}^{a,b} on [-1,1] by Newton's method with Chebyshev points as initial guesses.
246:    Algorithm implemented from the pseudocode given by Karniadakis and Sherwin and Python in FIAT */
247:   for (k = 0; k < npoints; ++k) {
248:     PetscReal r = -cos((2.0*k + 1.0) * PETSC_PI / (2.0 * npoints)), dP;
249:     PetscInt  j;

251:     if (k > 0) r = 0.5 * (r + x[k-1]);
252:     for (j = 0; j < maxIter; ++j) {
253:       PetscReal s = 0.0, delta, f, fp;
254:       PetscInt  i;

256:       for (i = 0; i < k; ++i) s = s + 1.0 / (r - x[i]);
257:       PetscDTComputeJacobi(a, b, npoints, r, &f);
258:       PetscDTComputeJacobiDerivative(a, b, npoints, r, &fp);
259:       delta = f / (fp - f * s);
260:       r     = r - delta;
261:       if (fabs(delta) < eps) break;
262:     }
263:     x[k] = r;
264:     PetscDTComputeJacobiDerivative(a, b, npoints, x[k], &dP);
265:     w[k] = a6 / (1.0 - PetscSqr(x[k])) / PetscSqr(dP);
266:   }
267:   return(0);
268: }

272: /*@C
273:   PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex

275:   Not Collective

277:   Input Arguments:
278: + dim - The simplex dimension
279: . npoints - number of points
280: . a - left end of interval (often-1)
281: - b - right end of interval (often +1)

283:   Output Arguments:
284: + points - quadrature points
285: - weights - quadrature weights

287:   Level: intermediate

289:   References:
290:   Karniadakis and Sherwin.
291:   FIAT

293: .seealso: PetscDTGaussQuadrature()
294: @*/
295: PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt npoints, PetscReal a, PetscReal b, PetscReal *points[], PetscReal *weights[])
296: {
297:   PetscReal     *px, *wx, *py, *wy, *pz, *wz, *x, *w;
298:   PetscInt       i, j, k;

302:   if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now");
303:   switch (dim) {
304:   case 1:
305:     PetscMalloc(npoints * sizeof(PetscReal), &x);
306:     PetscMalloc(npoints * sizeof(PetscReal), &w);
307:     PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, x, w);
308:     break;
309:   case 2:
310:     PetscMalloc(npoints*npoints*2 * sizeof(PetscReal), &x);
311:     PetscMalloc(npoints*npoints   * sizeof(PetscReal), &w);
312:     PetscMalloc4(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy);
313:     PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);
314:     PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);
315:     for (i = 0; i < npoints; ++i) {
316:       for (j = 0; j < npoints; ++j) {
317:         PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*npoints+j)*2+0], &x[(i*npoints+j)*2+1]);
318:         w[i*npoints+j] = 0.5 * wx[i] * wy[j];
319:       }
320:     }
321:     PetscFree4(px,wx,py,wy);
322:     break;
323:   case 3:
324:     PetscMalloc(npoints*npoints*3 * sizeof(PetscReal), &x);
325:     PetscMalloc(npoints*npoints   * sizeof(PetscReal), &w);
326:     PetscMalloc6(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy,npoints,PetscReal,&pz,npoints,PetscReal,&wz);
327:     PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);
328:     PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);
329:     PetscDTGaussJacobiQuadrature1D_Internal(npoints, 2.0, 0.0, pz, wz);
330:     for (i = 0; i < npoints; ++i) {
331:       for (j = 0; j < npoints; ++j) {
332:         for (k = 0; k < npoints; ++k) {
333:           PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*npoints+j)*npoints+k)*3+0], &x[((i*npoints+j)*npoints+k)*3+1], &x[((i*npoints+j)*npoints+k)*3+2]);
334:           w[(i*npoints+j)*npoints+k] = 0.125 * wx[i] * wy[j] * wz[k];
335:         }
336:       }
337:     }
338:     PetscFree6(px,wx,py,wy,pz,wz);
339:     break;
340:   default:
341:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim);
342:   }
343:   if (points)  *points  = x;
344:   if (weights) *weights = w;
345:   return(0);
346: }

350: /* Overwrites A. Can only handle full-rank problems with m>=n
351:  * A in column-major format
352:  * Ainv in row-major format
353:  * tau has length m
354:  * worksize must be >= max(1,n)
355:  */
356: static PetscErrorCode PetscDTPseudoInverseQR(PetscInt m,PetscInt mstride,PetscInt n,PetscReal *A_in,PetscReal *Ainv_out,PetscScalar *tau,PetscInt worksize,PetscScalar *work)
357: {
359:   PetscBLASInt M,N,K,lda,ldb,ldwork,info;
360:   PetscScalar *A,*Ainv,*R,*Q,Alpha;

363: #if defined(PETSC_USE_COMPLEX)
364:   {
365:     PetscInt i,j;
366:     PetscMalloc2(m*n,PetscScalar,&A,m*n,PetscScalar,&Ainv);
367:     for (j=0; j<n; j++) {
368:       for (i=0; i<m; i++) A[i+m*j] = A_in[i+mstride*j];
369:     }
370:     mstride = m;
371:   }
372: #else
373:   A = A_in;
374:   Ainv = Ainv_out;
375: #endif

377:   PetscBLASIntCast(m,&M);
378:   PetscBLASIntCast(n,&N);
379:   PetscBLASIntCast(mstride,&lda);
380:   PetscBLASIntCast(worksize,&ldwork);
381:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
382:   LAPACKgeqrf_(&M,&N,A,&lda,tau,work,&ldwork,&info);
383:   PetscFPTrapPop();
384:   if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRF error");
385:   R = A; /* Upper triangular part of A now contains R, the rest contains the elementary reflectors */

387:   /* Extract an explicit representation of Q */
388:   Q = Ainv;
389:   PetscMemcpy(Q,A,mstride*n*sizeof(PetscScalar));
390:   K = N;                        /* full rank */
391:   LAPACKungqr_(&M,&N,&K,Q,&lda,tau,work,&ldwork,&info);
392:   if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR/xUNGQR error");

394:   /* Compute A^{-T} = (R^{-1} Q^T)^T = Q R^{-T} */
395:   Alpha = 1.0;
396:   ldb = lda;
397:   BLAStrsm_("Right","Upper","ConjugateTranspose","NotUnitTriangular",&M,&N,&Alpha,R,&lda,Q,&ldb);
398:   /* Ainv is Q, overwritten with inverse */

400: #if defined(PETSC_USE_COMPLEX)
401:   {
402:     PetscInt i;
403:     for (i=0; i<m*n; i++) Ainv_out[i] = PetscRealPart(Ainv[i]);
404:     PetscFree2(A,Ainv);
405:   }
406: #endif
407:   return(0);
408: }

412: /* Computes integral of L_p' over intervals {(x0,x1),(x1,x2),...} */
413: static PetscErrorCode PetscDTLegendreIntegrate(PetscInt ninterval,const PetscReal *x,PetscInt ndegree,const PetscInt *degrees,PetscBool Transpose,PetscReal *B)
414: {
416:   PetscReal *Bv;
417:   PetscInt i,j;

420:   PetscMalloc((ninterval+1)*ndegree*sizeof(PetscReal),&Bv);
421:   /* Point evaluation of L_p on all the source vertices */
422:   PetscDTLegendreEval(ninterval+1,x,ndegree,degrees,Bv,NULL,NULL);
423:   /* Integral over each interval: \int_a^b L_p' = L_p(b)-L_p(a) */
424:   for (i=0; i<ninterval; i++) {
425:     for (j=0; j<ndegree; j++) {
426:       if (Transpose) B[i+ninterval*j] = Bv[(i+1)*ndegree+j] - Bv[i*ndegree+j];
427:       else           B[i*ndegree+j]   = Bv[(i+1)*ndegree+j] - Bv[i*ndegree+j];
428:     }
429:   }
430:   PetscFree(Bv);
431:   return(0);
432: }

436: /*@
437:    PetscDTReconstructPoly - create matrix representing polynomial reconstruction using cell intervals and evaluation at target intervals

439:    Not Collective

441:    Input Arguments:
442: +  degree - degree of reconstruction polynomial
443: .  nsource - number of source intervals
444: .  sourcex - sorted coordinates of source cell boundaries (length nsource+1)
445: .  ntarget - number of target intervals
446: -  targetx - sorted coordinates of target cell boundaries (length ntarget+1)

448:    Output Arguments:
449: .  R - reconstruction matrix, utarget = sum_s R[t*nsource+s] * usource[s]

451:    Level: advanced

453: .seealso: PetscDTLegendreEval()
454: @*/
455: PetscErrorCode PetscDTReconstructPoly(PetscInt degree,PetscInt nsource,const PetscReal *sourcex,PetscInt ntarget,const PetscReal *targetx,PetscReal *R)
456: {
458:   PetscInt i,j,k,*bdegrees,worksize;
459:   PetscReal xmin,xmax,center,hscale,*sourcey,*targety,*Bsource,*Bsinv,*Btarget;
460:   PetscScalar *tau,*work;

466:   if (degree >= nsource) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Reconstruction degree %D must be less than number of source intervals %D",degree,nsource);
467: #if defined(PETSC_USE_DEBUG)
468:   for (i=0; i<nsource; i++) {
469:     if (sourcex[i] >= sourcex[i+1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Source interval %D has negative orientation (%G,%G)",i,sourcex[i],sourcex[i+1]);
470:   }
471:   for (i=0; i<ntarget; i++) {
472:     if (targetx[i] >= targetx[i+1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Target interval %D has negative orientation (%G,%G)",i,targetx[i],targetx[i+1]);
473:   }
474: #endif
475:   xmin = PetscMin(sourcex[0],targetx[0]);
476:   xmax = PetscMax(sourcex[nsource],targetx[ntarget]);
477:   center = (xmin + xmax)/2;
478:   hscale = (xmax - xmin)/2;
479:   worksize = nsource;
480:   PetscMalloc4(degree+1,PetscInt,&bdegrees,nsource+1,PetscReal,&sourcey,nsource*(degree+1),PetscReal,&Bsource,worksize,PetscScalar,&work);
481:   PetscMalloc4(nsource,PetscScalar,&tau,nsource*(degree+1),PetscReal,&Bsinv,ntarget+1,PetscReal,&targety,ntarget*(degree+1),PetscReal,&Btarget);
482:   for (i=0; i<=nsource; i++) sourcey[i] = (sourcex[i]-center)/hscale;
483:   for (i=0; i<=degree; i++) bdegrees[i] = i+1;
484:   PetscDTLegendreIntegrate(nsource,sourcey,degree+1,bdegrees,PETSC_TRUE,Bsource);
485:   PetscDTPseudoInverseQR(nsource,nsource,degree+1,Bsource,Bsinv,tau,nsource,work);
486:   for (i=0; i<=ntarget; i++) targety[i] = (targetx[i]-center)/hscale;
487:   PetscDTLegendreIntegrate(ntarget,targety,degree+1,bdegrees,PETSC_FALSE,Btarget);
488:   for (i=0; i<ntarget; i++) {
489:     PetscReal rowsum = 0;
490:     for (j=0; j<nsource; j++) {
491:       PetscReal sum = 0;
492:       for (k=0; k<degree+1; k++) {
493:         sum += Btarget[i*(degree+1)+k] * Bsinv[k*nsource+j];
494:       }
495:       R[i*nsource+j] = sum;
496:       rowsum += sum;
497:     }
498:     for (j=0; j<nsource; j++) R[i*nsource+j] /= rowsum; /* normalize each row */
499:   }
500:   PetscFree4(bdegrees,sourcey,Bsource,work);
501:   PetscFree4(tau,Bsinv,targety,Btarget);
502:   return(0);
503: }