Actual source code: chwirut2.c

petsc-3.9.0 2018-04-07
Report Typos and Errors
  1: /*
  2:    Include "petsctao.h" so that we can use TAO solvers.  Note that this
  3:    file automatically includes libraries such as:
  4:      petsc.h       - base PETSc routines   petscvec.h - vectors
  5:      petscsys.h    - sysem routines        petscmat.h - matrices
  6:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
  7:      petscviewer.h - viewers               petscpc.h  - preconditioners

  9: */

 11: /*T
 12:    requires: !single
 13: T*/

 15:  #include <petsctao.h>
 16: #include <mpi.h>


 19: /*
 20: Description:   These data are the result of a NIST study involving
 21:                ultrasonic calibration.  The response variable is
 22:                ultrasonic response, and the predictor variable is
 23:                metal distance.

 25: Reference:     Chwirut, D., NIST (197?).
 26:                Ultrasonic Reference Block Study.
 27: */

 29: static char help[]="Finds the nonlinear least-squares solution to the model \n\
 30:             y = exp[-b1*x]/(b2+b3*x)  +  e \n";

 32: /* T
 33:    Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares
 34:    Routines: TaoCreate();
 35:    Routines: TaoSetType();
 36:    Routines: TaoSetSeparableObjectiveRoutine();
 37:    Routines: TaoSetMonitor();
 38:    Routines: TaoSetInitialVector();
 39:    Routines: TaoSetFromOptions();
 40:    Routines: TaoSolve();
 41:    Routines: TaoDestroy();
 42:    Processors: n
 43: T*/

 45: #define NOBSERVATIONS 214
 46: #define NPARAMETERS 3

 48: #define DIE_TAG 2000
 49: #define IDLE_TAG 1000

 51: /* User-defined application context */
 52: typedef struct {
 53:   /* Working space */
 54:   PetscReal   t[NOBSERVATIONS];   /* array of independent variables of observation */
 55:   PetscReal   y[NOBSERVATIONS];   /* array of dependent variables */
 56:   PetscMPIInt size,rank;
 57: } AppCtx;

 59: /* User provided Routines */
 60: PetscErrorCode InitializeData(AppCtx *user);
 61: PetscErrorCode FormStartingPoint(Vec);
 62: PetscErrorCode EvaluateFunction(Tao, Vec, Vec, void *);
 63: PetscErrorCode TaskWorker(AppCtx *user);
 64: PetscErrorCode StopWorkers(AppCtx *user);
 65: PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal*f, AppCtx *user);

 67: /*--------------------------------------------------------------------*/
 68: int main(int argc,char **argv)
 69: {
 71:   Vec            x, f;               /* solution, function */
 72:   Tao            tao;                /* Tao solver context */
 73:   AppCtx         user;               /* user-defined work context */

 75:    /* Initialize TAO and PETSc */
 76:   PetscInitialize(&argc,&argv,(char *)0,help);
 77:   MPI_Comm_size(MPI_COMM_WORLD,&user.size);
 78:   MPI_Comm_rank(MPI_COMM_WORLD,&user.rank);
 79:   InitializeData(&user);

 81:   /* Run optimization on rank 0 */
 82:   if (user.rank == 0) {
 83:     /* Allocate vectors */
 84:     VecCreateSeq(PETSC_COMM_SELF,NPARAMETERS,&x);
 85:     VecCreateSeq(PETSC_COMM_SELF,NOBSERVATIONS,&f);

 87:     /* TAO code begins here */

 89:     /* Create TAO solver and set desired solution method */
 90:     TaoCreate(PETSC_COMM_SELF,&tao);
 91:     TaoSetType(tao,TAOPOUNDERS);

 93:     /* Set the function and Jacobian routines. */
 94:     FormStartingPoint(x);
 95:     TaoSetInitialVector(tao,x);
 96:     TaoSetSeparableObjectiveRoutine(tao,f,EvaluateFunction,(void*)&user);

 98:     /* Check for any TAO command line arguments */
 99:     TaoSetFromOptions(tao);

101:     /* Perform the Solve */
102:     TaoSolve(tao);

104:     /* Free TAO data structures */
105:     TaoDestroy(&tao);

107:     /* Free PETSc data structures */
108:     VecDestroy(&x);
109:     VecDestroy(&f);
110:     StopWorkers(&user);
111:   } else {
112:     TaskWorker(&user);
113:   }
114:   PetscFinalize();
115:   return ierr;
116: }

118: /*--------------------------------------------------------------------*/
119: PetscErrorCode EvaluateFunction(Tao tao, Vec X, Vec F, void *ptr)
120: {
121:   AppCtx         *user = (AppCtx *)ptr;
122:   PetscInt       i;
123:   PetscReal      *x,*f;

127:   VecGetArray(X,&x);
128:   VecGetArray(F,&f);
129:   if (user->size == 1) {
130:     /* Single processor */
131:     for (i=0;i<NOBSERVATIONS;i++) {
132:       RunSimulation(x,i,&f[i],user);
133:     }
134:   } else {
135:     /* Multiprocessor master */
136:     PetscMPIInt tag;
137:     PetscInt    finishedtasks,next_task,checkedin;
138:     PetscReal   f_i=0.0;
139:     MPI_Status  status;

141:     next_task=0;
142:     finishedtasks=0;
143:     checkedin=0;

145:     while(finishedtasks < NOBSERVATIONS || checkedin < user->size-1) {
146:       MPI_Recv(&f_i,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status);
147:       if (status.MPI_TAG == IDLE_TAG) {
148:         checkedin++;
149:       } else {

151:         tag = status.MPI_TAG;
152:         f[tag] = (PetscReal)f_i;
153:         finishedtasks++;
154:       }

156:       if (next_task<NOBSERVATIONS) {
157:         MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,next_task,PETSC_COMM_WORLD);
158:         next_task++;

160:       } else {
161:         /* Send idle message */
162:         MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,IDLE_TAG,PETSC_COMM_WORLD);
163:       }
164:     }
165:   }
166:   VecRestoreArray(X,&x);
167:   VecRestoreArray(F,&f);
168:   PetscLogFlops(6*NOBSERVATIONS);
169:   return(0);
170: }

172: /* ------------------------------------------------------------ */
173: PetscErrorCode FormStartingPoint(Vec X)
174: {
175:   PetscReal      *x;

179:   VecGetArray(X,&x);
180:   x[0] = 0.15;
181:   x[1] = 0.008;
182:   x[2] = 0.010;
183:   VecRestoreArray(X,&x);
184:   return(0);
185: }

187: /* ---------------------------------------------------------------------- */
188: PetscErrorCode InitializeData(AppCtx *user)
189: {
190:   PetscReal *t=user->t,*y=user->y;
191:   PetscInt  i=0;

194:   y[i] =   92.9000;   t[i++] =  0.5000;
195:   y[i] =    78.7000;  t[i++] =   0.6250;
196:   y[i] =    64.2000;  t[i++] =   0.7500;
197:   y[i] =    64.9000;  t[i++] =   0.8750;
198:   y[i] =    57.1000;  t[i++] =   1.0000;
199:   y[i] =    43.3000;  t[i++] =   1.2500;
200:   y[i] =    31.1000;   t[i++] =  1.7500;
201:   y[i] =    23.6000;   t[i++] =  2.2500;
202:   y[i] =    31.0500;   t[i++] =  1.7500;
203:   y[i] =    23.7750;   t[i++] =  2.2500;
204:   y[i] =    17.7375;   t[i++] =  2.7500;
205:   y[i] =    13.8000;   t[i++] =  3.2500;
206:   y[i] =    11.5875;   t[i++] =  3.7500;
207:   y[i] =     9.4125;   t[i++] =  4.2500;
208:   y[i] =     7.7250;   t[i++] =  4.7500;
209:   y[i] =     7.3500;   t[i++] =  5.2500;
210:   y[i] =     8.0250;   t[i++] =  5.7500;
211:   y[i] =    90.6000;   t[i++] =  0.5000;
212:   y[i] =    76.9000;   t[i++] =  0.6250;
213:   y[i] =    71.6000;   t[i++] = 0.7500;
214:   y[i] =    63.6000;   t[i++] =  0.8750;
215:   y[i] =    54.0000;   t[i++] =  1.0000;
216:   y[i] =    39.2000;   t[i++] =  1.2500;
217:   y[i] =    29.3000;   t[i++] = 1.7500;
218:   y[i] =    21.4000;   t[i++] =  2.2500;
219:   y[i] =    29.1750;   t[i++] =  1.7500;
220:   y[i] =    22.1250;   t[i++] =  2.2500;
221:   y[i] =    17.5125;   t[i++] =  2.7500;
222:   y[i] =    14.2500;   t[i++] =  3.2500;
223:   y[i] =     9.4500;   t[i++] =  3.7500;
224:   y[i] =     9.1500;   t[i++] =  4.2500;
225:   y[i] =     7.9125;   t[i++] =  4.7500;
226:   y[i] =     8.4750;   t[i++] =  5.2500;
227:   y[i] =     6.1125;   t[i++] =  5.7500;
228:   y[i] =    80.0000;   t[i++] =  0.5000;
229:   y[i] =    79.0000;   t[i++] =  0.6250;
230:   y[i] =    63.8000;   t[i++] =  0.7500;
231:   y[i] =    57.2000;   t[i++] =  0.8750;
232:   y[i] =    53.2000;   t[i++] =  1.0000;
233:   y[i] =   42.5000;   t[i++] =  1.2500;
234:   y[i] =   26.8000;   t[i++] =  1.7500;
235:   y[i] =    20.4000;   t[i++] =  2.2500;
236:   y[i] =    26.8500;  t[i++] =   1.7500;
237:   y[i] =    21.0000;  t[i++] =   2.2500;
238:   y[i] =    16.4625;  t[i++] =   2.7500;
239:   y[i] =    12.5250;  t[i++] =   3.2500;
240:   y[i] =    10.5375;  t[i++] =   3.7500;
241:   y[i] =     8.5875;  t[i++] =   4.2500;
242:   y[i] =     7.1250;  t[i++] =   4.7500;
243:   y[i] =     6.1125;  t[i++] =   5.2500;
244:   y[i] =     5.9625;  t[i++] =   5.7500;
245:   y[i] =    74.1000;  t[i++] =   0.5000;
246:   y[i] =    67.3000;  t[i++] =   0.6250;
247:   y[i] =    60.8000;  t[i++] =   0.7500;
248:   y[i] =    55.5000;  t[i++] =   0.8750;
249:   y[i] =    50.3000;  t[i++] =   1.0000;
250:   y[i] =    41.0000;  t[i++] =   1.2500;
251:   y[i] =    29.4000;  t[i++] =   1.7500;
252:   y[i] =    20.4000;  t[i++] =   2.2500;
253:   y[i] =    29.3625;  t[i++] =   1.7500;
254:   y[i] =    21.1500;  t[i++] =   2.2500;
255:   y[i] =    16.7625;  t[i++] =   2.7500;
256:   y[i] =    13.2000;  t[i++] =   3.2500;
257:   y[i] =    10.8750;  t[i++] =   3.7500;
258:   y[i] =     8.1750;  t[i++] =   4.2500;
259:   y[i] =     7.3500;  t[i++] =   4.7500;
260:   y[i] =     5.9625;  t[i++] =  5.2500;
261:   y[i] =     5.6250;  t[i++] =   5.7500;
262:   y[i] =    81.5000;  t[i++] =    .5000;
263:   y[i] =    62.4000;  t[i++] =    .7500;
264:   y[i] =    32.5000;  t[i++] =   1.5000;
265:   y[i] =    12.4100;  t[i++] =   3.0000;
266:   y[i] =    13.1200;  t[i++] =   3.0000;
267:   y[i] =    15.5600;  t[i++] =   3.0000;
268:   y[i] =     5.6300;  t[i++] =   6.0000;
269:   y[i] =    78.0000;   t[i++] =   .5000;
270:   y[i] =    59.9000;  t[i++] =    .7500;
271:   y[i] =    33.2000;  t[i++] =   1.5000;
272:   y[i] =    13.8400;  t[i++] =   3.0000;
273:   y[i] =    12.7500;  t[i++] =   3.0000;
274:   y[i] =    14.6200;  t[i++] =   3.0000;
275:   y[i] =     3.9400;  t[i++] =   6.0000;
276:   y[i] =    76.8000;  t[i++] =    .5000;
277:   y[i] =    61.0000;  t[i++] =    .7500;
278:   y[i] =    32.9000;  t[i++] =   1.5000;
279:   y[i] =   13.8700;   t[i++] = 3.0000;
280:   y[i] =    11.8100;  t[i++] =   3.0000;
281:   y[i] =    13.3100;  t[i++] =   3.0000;
282:   y[i] =     5.4400;  t[i++] =   6.0000;
283:   y[i] =    78.0000;  t[i++] =    .5000;
284:   y[i] =    63.5000;  t[i++] =    .7500;
285:   y[i] =    33.8000;  t[i++] =   1.5000;
286:   y[i] =    12.5600;  t[i++] =   3.0000;
287:   y[i] =     5.6300;  t[i++] =   6.0000;
288:   y[i] =    12.7500;  t[i++] =   3.0000;
289:   y[i] =    13.1200;  t[i++] =   3.0000;
290:   y[i] =     5.4400;  t[i++] =   6.0000;
291:   y[i] =    76.8000;  t[i++] =    .5000;
292:   y[i] =    60.0000;  t[i++] =    .7500;
293:   y[i] =    47.8000;  t[i++] =   1.0000;
294:   y[i] =    32.0000;  t[i++] =   1.5000;
295:   y[i] =    22.2000;  t[i++] =   2.0000;
296:   y[i] =    22.5700;  t[i++] =   2.0000;
297:   y[i] =    18.8200;  t[i++] =   2.5000;
298:   y[i] =    13.9500;  t[i++] =   3.0000;
299:   y[i] =    11.2500;  t[i++] =   4.0000;
300:   y[i] =     9.0000;  t[i++] =   5.0000;
301:   y[i] =     6.6700;  t[i++] =   6.0000;
302:   y[i] =    75.8000;  t[i++] =    .5000;
303:   y[i] =    62.0000;  t[i++] =    .7500;
304:   y[i] =    48.8000;  t[i++] =   1.0000;
305:   y[i] =    35.2000;  t[i++] =   1.5000;
306:   y[i] =    20.0000;  t[i++] =   2.0000;
307:   y[i] =    20.3200;  t[i++] =   2.0000;
308:   y[i] =    19.3100;  t[i++] =   2.5000;
309:   y[i] =    12.7500;  t[i++] =   3.0000;
310:   y[i] =    10.4200;  t[i++] =   4.0000;
311:   y[i] =     7.3100;  t[i++] =   5.0000;
312:   y[i] =     7.4200;  t[i++] =   6.0000;
313:   y[i] =    70.5000;  t[i++] =    .5000;
314:   y[i] =    59.5000;  t[i++] =    .7500;
315:   y[i] =    48.5000;  t[i++] =   1.0000;
316:   y[i] =    35.8000;  t[i++] =   1.5000;
317:   y[i] =    21.0000;  t[i++] =   2.0000;
318:   y[i] =    21.6700;  t[i++] =   2.0000;
319:   y[i] =    21.0000;  t[i++] =   2.5000;
320:   y[i] =    15.6400;  t[i++] =   3.0000;
321:   y[i] =     8.1700;  t[i++] =   4.0000;
322:   y[i] =     8.5500;  t[i++] =   5.0000;
323:   y[i] =    10.1200;  t[i++] =   6.0000;
324:   y[i] =    78.0000;  t[i++] =    .5000;
325:   y[i] =    66.0000;  t[i++] =    .6250;
326:   y[i] =    62.0000;  t[i++] =    .7500;
327:   y[i] =    58.0000;  t[i++] =    .8750;
328:   y[i] =    47.7000;  t[i++] =   1.0000;
329:   y[i] =    37.8000;  t[i++] =   1.2500;
330:   y[i] =    20.2000;  t[i++] =   2.2500;
331:   y[i] =    21.0700;  t[i++] =   2.2500;
332:   y[i] =    13.8700;  t[i++] =   2.7500;
333:   y[i] =     9.6700;  t[i++] =   3.2500;
334:   y[i] =     7.7600;  t[i++] =   3.7500;
335:   y[i] =    5.4400;   t[i++] =  4.2500;
336:   y[i] =    4.8700;   t[i++] =  4.7500;
337:   y[i] =     4.0100;  t[i++] =   5.2500;
338:   y[i] =     3.7500;  t[i++] =   5.7500;
339:   y[i] =    24.1900;  t[i++] =   3.0000;
340:   y[i] =    25.7600;  t[i++] =   3.0000;
341:   y[i] =    18.0700;  t[i++] =   3.0000;
342:   y[i] =    11.8100;  t[i++] =   3.0000;
343:   y[i] =    12.0700;  t[i++] =   3.0000;
344:   y[i] =    16.1200;  t[i++] =   3.0000;
345:   y[i] =    70.8000;  t[i++] =    .5000;
346:   y[i] =    54.7000;  t[i++] =    .7500;
347:   y[i] =    48.0000;  t[i++] =   1.0000;
348:   y[i] =    39.8000;  t[i++] =   1.5000;
349:   y[i] =    29.8000;  t[i++] =   2.0000;
350:   y[i] =    23.7000;  t[i++] =   2.5000;
351:   y[i] =    29.6200;  t[i++] =   2.0000;
352:   y[i] =    23.8100;  t[i++] =   2.5000;
353:   y[i] =    17.7000;  t[i++] =   3.0000;
354:   y[i] =    11.5500;  t[i++] =   4.0000;
355:   y[i] =    12.0700;  t[i++] =   5.0000;
356:   y[i] =     8.7400;  t[i++] =   6.0000;
357:   y[i] =    80.7000;  t[i++] =    .5000;
358:   y[i] =    61.3000;  t[i++] =    .7500;
359:   y[i] =    47.5000;  t[i++] =   1.0000;
360:    y[i] =   29.0000;  t[i++] =   1.5000;
361:    y[i] =   24.0000;  t[i++] =   2.0000;
362:   y[i] =    17.7000;  t[i++] =   2.5000;
363:   y[i] =    24.5600;  t[i++] =   2.0000;
364:   y[i] =    18.6700;  t[i++] =   2.5000;
365:    y[i] =   16.2400;  t[i++] =   3.0000;
366:   y[i] =     8.7400;  t[i++] =   4.0000;
367:   y[i] =     7.8700;  t[i++] =   5.0000;
368:   y[i] =     8.5100;  t[i++] =   6.0000;
369:   y[i] =    66.7000;  t[i++] =    .5000;
370:   y[i] =    59.2000;  t[i++] =    .7500;
371:   y[i] =    40.8000;  t[i++] =   1.0000;
372:   y[i] =    30.7000;  t[i++] =   1.5000;
373:   y[i] =    25.7000;  t[i++] =   2.0000;
374:   y[i] =    16.3000;  t[i++] =   2.5000;
375:   y[i] =    25.9900;  t[i++] =   2.0000;
376:   y[i] =    16.9500;  t[i++] =   2.5000;
377:   y[i] =    13.3500;  t[i++] =   3.0000;
378:   y[i] =     8.6200;  t[i++] =   4.0000;
379:   y[i] =     7.2000;  t[i++] =   5.0000;
380:   y[i] =     6.6400;  t[i++] =   6.0000;
381:   y[i] =    13.6900;  t[i++] =   3.0000;
382:   y[i] =    81.0000;  t[i++] =    .5000;
383:   y[i] =    64.5000;  t[i++] =    .7500;
384:   y[i] =    35.5000;  t[i++] =   1.5000;
385:    y[i] =   13.3100;  t[i++] =   3.0000;
386:   y[i] =     4.8700;  t[i++] =   6.0000;
387:   y[i] =    12.9400;  t[i++] =   3.0000;
388:   y[i] =     5.0600;  t[i++] =   6.0000;
389:   y[i] =    15.1900;  t[i++] =   3.0000;
390:   y[i] =    14.6200;  t[i++] =   3.0000;
391:   y[i] =    15.6400;  t[i++] =   3.0000;
392:   y[i] =    25.5000;  t[i++] =   1.7500;
393:   y[i] =    25.9500;  t[i++] =   1.7500;
394:   y[i] =    81.7000;  t[i++] =    .5000;
395:   y[i] =    61.6000;  t[i++] =    .7500;
396:   y[i] =    29.8000;  t[i++] =   1.7500;
397:   y[i] =    29.8100;  t[i++] =   1.7500;
398:   y[i] =    17.1700;  t[i++] =   2.7500;
399:   y[i] =    10.3900;  t[i++] =   3.7500;
400:   y[i] =    28.4000;  t[i++] =   1.7500;
401:   y[i] =    28.6900;  t[i++] =   1.7500;
402:   y[i] =    81.3000;  t[i++] =    .5000;
403:   y[i] =    60.9000;  t[i++] =    .7500;
404:   y[i] =    16.6500;  t[i++] =   2.7500;
405:   y[i] =    10.0500;  t[i++] =   3.7500;
406:   y[i] =    28.9000;  t[i++] =   1.7500;
407:   y[i] =    28.9500;  t[i++] =   1.7500;
408:   return(0);
409: }

411: PetscErrorCode TaskWorker(AppCtx *user)
412: {
413:   PetscReal      x[NPARAMETERS],f = 0.0;
414:   PetscMPIInt    tag=IDLE_TAG;
415:   PetscInt       index;
416:   MPI_Status     status;

420:   /* Send check-in message to master */

422:   MPI_Send(&f,1,MPIU_REAL,0,IDLE_TAG,PETSC_COMM_WORLD);
423:   while (tag != DIE_TAG) {
424:     MPI_Recv(x,NPARAMETERS,MPIU_REAL,0,MPI_ANY_TAG,PETSC_COMM_WORLD,&status);
425:     tag = status.MPI_TAG;
426:     if (tag == IDLE_TAG) {
427:       MPI_Send(&f,1,MPIU_REAL,0,IDLE_TAG,PETSC_COMM_WORLD);
428:     } else if (tag != DIE_TAG) {
429:       index = (PetscInt)tag;
430:       ierr=RunSimulation(x,index,&f,user);
431:       ierr=MPI_Send(&f,1,MPIU_REAL,0,tag,PETSC_COMM_WORLD);
432:     }
433:   }
434:   return(0);
435: }

437: PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal*f, AppCtx *user)
438: {
439:   PetscReal *t = user->t;
440:   PetscReal *y = user->y;
441:   *f = y[i] - PetscExpScalar(-x[0]*t[i])/(x[1] + x[2]*t[i]);
442:   return(0);
443: }

445: PetscErrorCode StopWorkers(AppCtx *user)
446: {
447:   PetscInt       checkedin;
448:   MPI_Status     status;
449:   PetscReal      f,x[NPARAMETERS];

453:   checkedin=0;
454:   while(checkedin < user->size-1) {
455:     MPI_Recv(&f,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status);
456:     checkedin++;
457:     PetscMemzero(x,NPARAMETERS*sizeof(PetscReal));
458:     MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,DIE_TAG,PETSC_COMM_WORLD);
459:   }
460:   return(0);
461: }


464: /*TEST

466:    build:
467:       requires: !complex

469:    test:
470:       nsize: 3
471:       args: -tao_smonitor -tao_max_it 100 -tao_type pounders
472:       TODO: too many inconsistent results across machines

474: TEST*/