Actual source code: xmllogevent.c

petsc-3.9.1 2018-04-29
Report Typos and Errors
  1: /*************************************************************************************
  2:  *    M A R I T I M E  R E S E A R C H  I N S T I T U T E  N E T H E R L A N D S     *
  3:  *************************************************************************************
  4:  *    authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij                    *
  5:  *************************************************************************************
  6:  *    content: Support for nested PetscTimers                                        *
  7:  *************************************************************************************/
  8:  #include <petsclog.h>
  9:  #include <petsc/private/logimpl.h>
 10:  #include <petsctime.h>
 11:  #include <petscviewer.h>
 12: #include "../src/sys/logging/xmllogevent.h"
 13: #include "../src/sys/logging/xmlviewer.h"

 15: #if defined(PETSC_USE_LOG)

 17: /*
 18:  * Support for nested PetscTimers
 19:  *
 20:  * PetscTimers keep track of a lot of useful information: Wall clock times, 
 21:  * message passing statistics, flop counts.  Information about the nested structure 
 22:  * of the timers is lost. Example: 
 23:  *
 24:  * 7:30   Start: awake
 25:  * 7:30      Start: morning routine
 26:  * 7:40         Start: eat
 27:  * 7:49         Done:  eat
 28:  * 7:43      Done:  morning routine
 29:  * 8:15      Start: work
 30:  * 12:15        Start: eat
 31:  * 12:45        Done:  eat
 32:  * 16:00     Done:  work
 33:  * 16:30     Start: evening routine
 34:  * 18:30        Start: eat
 35:  * 19:15        Done:  eat
 36:  * 22:00     Done:  evening routine
 37:  * 22:00  Done:  awake
 38:  *
 39:  * Petsc timers provide the following timer results:
 40:  *
 41:  *    awake:              1 call    14:30 hours
 42:  *    morning routine:    1 call     0:13 hours
 43:  *    eat:                3 calls    1:24 hours
 44:  *    work:               1 call     7:45 hours
 45:  *    evening routine     1 call     5:30 hours
 46:  *
 47:  * Nested timers can be used to get the following table:
 48:  *
 49:  *   [1 call]: awake                14:30 hours
 50:  *   [1 call]:    morning routine         0:13 hours         ( 2 % of awake)
 51:  *   [1 call]:       eat                       0:09 hours         (69 % of morning routine)
 52:  *                   rest (morning routine)    0:04 hours         (31 % of morning routine)
 53:  *   [1 call]:    work                    7:45 hours         (53 % of awake)
 54:  *   [1 call]:       eat                       0:30 hours         ( 6 % of work)
 55:  *                   rest (work)               7:15 hours         (94 % of work)
 56:  *   [1 call]:    evening routine         5:30 hours         (38 % of awake)
 57:  *   [1 call]:       eat                       0:45 hours         (14 % of evening routine)
 58:  *                   rest (evening routine)    4:45 hours         (86 % of morning routine)
 59:  *
 60:  * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least, 
 61:  * the nested timers make the stages unnecessary.
 62:  *
 63:  */

 65: /*
 66:  * Data structures for keeping track of nested timers:
 67:  *
 68:  *   nestedEvents: information about the timers that have actually been activated
 69:  *   dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
 70:  *
 71:  * The Default-timers are used to time the nested timers. Every nested timer corresponds to 
 72:  * (one or more) default timers, where one of the default timers has the same event-id as the 
 73:  * nested one.
 74:  *
 75:  * Because of the risk of confusion between nested timer ids and default timer ids, we 
 76:  * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent 
 77:  * only for default events. Also, all nested event variables are prepended with 'nst', and
 78:  * default timers with 'dft'.
 79:  */

 81: #define DFT_ID_AWAKE -1

 83: typedef PetscLogEvent NestedEventId;
 84: typedef struct {
 85:   NestedEventId   nstEvent;         /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
 86:   int             nParents;         /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
 87:   PetscLogEvent  *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
 88:   PetscLogEvent  *dftEvents;        /* The default timers which represent the different 'instances' of this nested event */

 90:   PetscLogEvent  *dftParents;       /* The default timers which were the dftParentActive when this nested event was started */
 91:   PetscLogEvent  *dftEventsSorted;  /* The default timers which represent the different 'instances' of this nested event */
 92: } PetscNestedEvent;

 94: static PetscLogEvent    dftParentActive  = DFT_ID_AWAKE;
 95: static int              nNestedEvents           = 0;
 96: static int              nNestedEventsAllocated  = 0;
 97: static PetscNestedEvent *nestedEvents = NULL;
 98: static PetscLogDouble   threshTime      = 0.01; /* initial value was .1 */

100: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
101: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);

103: /*@C
104:   PetscLogNestedBegin - Turns on nested logging of objects and events. This logs flop
105:   rates and object creation and should not slow programs down too much.

107:   Logically Collective over PETSC_COMM_WORLD

109:   Options Database Keys:
110: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file

112:   Usage:
113: .vb
114:       PetscInitialize(...);
115:       PetscLogNestedBegin();
116:        ... code ...
117:       PetscLogView(viewer);
118:       PetscFinalize();
119: .ve

121:   Level: advanced

123: .keywords: log, begin
124: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin()
125: @*/
126: PetscErrorCode PetscLogNestedBegin(void)
127: {
128:   PetscErrorCode    ierr;
130:   if (nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents already allocated");

132:   nNestedEventsAllocated=10;
133:   PetscMalloc1(nNestedEventsAllocated,&nestedEvents);


136:   dftParentActive = DFT_ID_AWAKE;
137:   nNestedEvents =1;

139:   /* 'Awake' is nested event 0. It has no parents */
140:   nestedEvents[0].nstEvent          = 0;
141:   nestedEvents[0].nParents          = 0;
142:   nestedEvents[0].dftParentsSorted  = NULL;
143:   nestedEvents[0].dftEvents         = NULL;
144:   nestedEvents[0].dftParents        = NULL;
145:   nestedEvents[0].dftEventsSorted   = NULL;

147:   PetscLogSet(PetscLogEventBeginNested, PetscLogEventEndNested);
148:   return(0);
149: }

151: /* Delete the data structures for the nested timers */
152: PetscErrorCode PetscLogNestedEnd(void)
153: {
154:   PetscErrorCode    ierr;
155:   int               i;

158:   if (!nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents does not exist");

160:   for (i=0; i<nNestedEvents; i++) {
161:     PetscFree4(nestedEvents[i].dftParentsSorted,nestedEvents[i].dftEventsSorted,nestedEvents[i].dftParents,nestedEvents[i].dftEvents);
162:   }
163:   PetscFree(nestedEvents);
164:   nestedEvents           = NULL;
165:   nNestedEvents          = 0;
166:   nNestedEventsAllocated = 0;
167:   return(0);
168: }


171: /*
172:  * UTILITIES: FIND STUFF IN SORTED ARRAYS
173:  *
174:  * Utility: find a default timer in a sorted array */
175: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex,    /* index to be found */
176:                                                     const PetscLogEvent *dftArray,  /* sorted array of PetscLogEvent-ids */
177:                                                     int narray,   /* dimension of dftArray */
178:                                                     int *entry)         /* entry in the array where dftIndex may be found; 
179:                                                                             *   if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray  
180:                                                                             *   In that case, the dftIndex can be inserted at this entry. */
181: {
183:   if (narray==0 || dftIndex <= dftArray[0]) {
184:     *entry = 0;
185:   } else if (dftIndex > dftArray[narray-1]) {
186:     *entry = narray;
187:   } else {
188:     int ihigh=narray-1,  ilow=0;
189:     while (ihigh>ilow) {
190:       const int imiddle = (ihigh+ilow)/2;
191:       if (dftArray[imiddle] > dftIndex) {
192:         ihigh=imiddle;
193:       } else if (dftArray[imiddle]<dftIndex) {
194:         ilow =imiddle+1;
195:       } else {
196:         ihigh=imiddle;
197:         ilow =imiddle;
198:       }
199:     }
200:     *entry = ihigh;
201:   }
202:   return(0);
203: }

205: /* Utility: find the nested event with given identification */
206: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent, /* Nested event to be found */
207:                                                    int *entry)          /* entry in the nestedEvents where nstEvent may be found;
208:                                                                               if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray  */
209: {

212:   if (nNestedEvents==0 || nstEvent <= nestedEvents[0].nstEvent) {
213:     *entry = 0;
214:   } else if (nstEvent > nestedEvents[nNestedEvents-1].nstEvent) {
215:     *entry = nNestedEvents;
216:   } else {
217:     int ihigh=nNestedEvents-1,  ilow=0;
218:     while (ihigh>ilow) {
219:       const int imiddle = (ihigh+ilow)/2;
220:       if (nestedEvents[imiddle].nstEvent > nstEvent) {
221:         ihigh=imiddle;
222:       } else if (nestedEvents[imiddle].nstEvent<nstEvent) {
223:         ilow =imiddle+1;
224:       } else {
225:         ihigh=imiddle;
226:         ilow =imiddle;
227:       }
228:     }
229:     *entry = ihigh;
230:   }
231:   return(0);
232: }

234: /******************************************************************************************/
235: /* Start a nested event */
236: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
237: {
238:   PetscErrorCode  ierr;
239:   int             entry, pentry, tentry,i;
240:   PetscLogEvent   dftEvent;

243:   PetscLogEventFindNestedTimer(nstEvent, &entry);
244:   if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
245:     /* Nested event doesn't exist yet: create it */

247:     if (nNestedEvents==nNestedEventsAllocated) {
248:       /* Enlarge and re-allocate nestedEvents if needed */
249:       PetscNestedEvent *tmp = nestedEvents;
250:       PetscMalloc1(2*nNestedEvents,&nestedEvents);
251:       nNestedEventsAllocated*=2;
252:       PetscMemcpy(nestedEvents, tmp, nNestedEvents*sizeof(PetscNestedEvent));
253:       PetscFree(tmp);
254:     }

256:     /* Clear space in nestedEvents for new nested event */
257:     nNestedEvents++;
258:     for (i = nNestedEvents-1; i>entry; i--) {
259:       nestedEvents[i] = nestedEvents[i-1];
260:     }

262:     /* Create event in nestedEvents */
263:     nestedEvents[entry].nstEvent = nstEvent;
264:     nestedEvents[entry].nParents=1;
265:     PetscMalloc4(1,&nestedEvents[entry].dftParentsSorted,1,&nestedEvents[entry].dftEventsSorted,1,&nestedEvents[entry].dftParents,1,&nestedEvents[entry].dftEvents);

267:     /* Fill in new event */
268:     pentry = 0;
269:     dftEvent = (PetscLogEvent) nstEvent;

271:     nestedEvents[entry].nstEvent                 = nstEvent;
272:     nestedEvents[entry].dftParents[pentry]       = dftParentActive;
273:     nestedEvents[entry].dftEvents[pentry]        = dftEvent;
274:     nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
275:     nestedEvents[entry].dftEventsSorted[pentry]  = dftEvent;

277:   } else {
278:     /* Nested event exists: find current dftParentActive among parents */
279:     PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
280:     PetscLogEvent *dftEvents        = nestedEvents[entry].dftEvents;
281:     int           nParents          = nestedEvents[entry].nParents;

283:     PetscLogEventFindDefaultTimer( dftParentActive, dftParentsSorted, nParents, &pentry);

285:     if (pentry>=nParents || dftParentActive != dftParentsSorted[pentry]) {
286:       /* dftParentActive not in the list: add it to the list */
287:       int           i;
288:       PetscLogEvent *dftParents      = nestedEvents[entry].dftParents;
289:       PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
290:       char          name[100];

292:       /* Register a new default timer */
293:       sprintf(name, "%d -> %d", (int) dftParentActive, (int) nstEvent);
294:       PetscLogEventRegister(name, 0, &dftEvent);
295:       PetscLogEventFindDefaultTimer( dftEvent, dftEventsSorted, nParents, &tentry);

297:       /* Reallocate parents and dftEvents to make space for new parent */
298:       PetscMalloc4(1+nParents,&nestedEvents[entry].dftParentsSorted,1+nParents,&nestedEvents[entry].dftEventsSorted,1+nParents,&nestedEvents[entry].dftParents,1+nParents,&nestedEvents[entry].dftEvents);
299:       PetscMemcpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents*sizeof(PetscLogEvent));
300:       PetscMemcpy(nestedEvents[entry].dftEventsSorted,  dftEventsSorted,  nParents*sizeof(PetscLogEvent));
301:       PetscMemcpy(nestedEvents[entry].dftParents,       dftParents,       nParents*sizeof(PetscLogEvent));
302:       PetscMemcpy(nestedEvents[entry].dftEvents,        dftEvents,        nParents*sizeof(PetscLogEvent));
303:       PetscFree4(dftParentsSorted,dftEventsSorted,dftParents,dftEvents);

305:       dftParents       = nestedEvents[entry].dftParents;
306:       dftEvents        = nestedEvents[entry].dftEvents;
307:       dftParentsSorted = nestedEvents[entry].dftParentsSorted;
308:       dftEventsSorted  = nestedEvents[entry].dftEventsSorted;

310:       nestedEvents[entry].nParents++;
311:       nParents++;

313:       for (i = nParents-1; i>pentry; i--) {
314:         dftParentsSorted[i] = dftParentsSorted[i-1];
315:         dftEvents[i]        = dftEvents[i-1];
316:       }
317:       for (i = nParents-1; i>tentry; i--) {
318:         dftParents[i]      = dftParents[i-1];
319:         dftEventsSorted[i] = dftEventsSorted[i-1];
320:       }

322:       /* Fill in the new default timer */
323:       dftParentsSorted[pentry] = dftParentActive;
324:       dftEvents[pentry]        = dftEvent;
325:       dftParents[tentry]       = dftParentActive;
326:       dftEventsSorted[tentry]  = dftEvent;

328:     } else {
329:       /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
330:       dftEvent = nestedEvents[entry].dftEvents[pentry];
331:     }
332:   }

334:   /* Start the default 'dftEvent'-timer and update the dftParentActive */
335:   PetscLogEventBeginDefault(dftEvent,t,o1,o2,o3,o4);
336:   dftParentActive = dftEvent;
337:   return(0);
338: }

340: /* End a nested event */
341: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
342: {
343:   PetscErrorCode  ierr;
344:   int             entry, pentry, nParents;
345:   PetscLogEvent  *dftEventsSorted;

348:   /* Find the nested event */
349:   PetscLogEventFindNestedTimer(nstEvent, &entry);
350:   if (entry>=nNestedEvents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d larger than number of events %d",entry,nNestedEvents);
351:   if (nestedEvents[entry].nstEvent != nstEvent) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs does not match %d",entry,nstEvent);
352:   dftEventsSorted = nestedEvents[entry].dftEventsSorted;
353:   nParents        = nestedEvents[entry].nParents;

355:   /* Find the current default timer among the 'dftEvents' of this event */
356:   PetscLogEventFindDefaultTimer( dftParentActive, dftEventsSorted, nParents, &pentry);

358:   if (pentry>=nParents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Entry %d is larger than number of parents %d",pentry,nParents);
359:   if (dftEventsSorted[pentry] != dftParentActive) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d",dftParentActive,dftEventsSorted[pentry]);

361:   /* Stop the default timer and update the dftParentActive */
362:   PetscLogEventEndDefault(dftParentActive,t,o1,o2,o3,o4);
363:   dftParentActive = nestedEvents[entry].dftParents[pentry];
364:   return(0);
365: }

367: /*@
368:    PetscLogSetThreshold - Set the threshold time for logging the events; this is a percentage out of 100, so 1. means any event
369:           that takes 1 or more percent of the time.

371:   Logically Collective over PETSC_COMM_WORLD

373:   Input Parameter:
374: .   newThresh - the threshold to use

376:   Output Parameter:
377: .   oldThresh - the previously set threshold value

379:   Options Database Keys:
380: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file

382:   Usage:
383: .vb
384:       PetscInitialize(...);
385:       PetscLogNestedBegin();
386:       PetscLogSetThreshold(.1,&oldthresh);
387:        ... code ...
388:       PetscLogView(viewer);
389:       PetscFinalize();
390: .ve

392:   Level: advanced

394: .keywords: log, begin
395: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin(),
396:           PetscLogNestedBegin()
397: @*/
398: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
399: {
401:   if (oldThresh) *oldThresh = threshTime;
402:   threshTime = newThresh;
403:   return(0);
404: }

406: static PetscErrorCode  PetscPrintExeSpecs(PetscViewer viewer)
407: {
408:   PetscErrorCode     ierr;
409:   char               arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
410:   char               version[256], buildoptions[128];
411:   PetscMPIInt        size;
412:   MPI_Comm           comm;
413:   size_t             len;
414: 
416:   PetscObjectGetComm((PetscObject)viewer,&comm);
417:   PetscGetArchType(arch,sizeof(arch));
418:   PetscGetHostName(hostname,sizeof(hostname));
419:   PetscGetUserName(username,sizeof(username));
420:   PetscGetProgramName(pname,sizeof(pname));
421:   PetscGetDate(date,sizeof(date));
422:   PetscGetVersion(version,sizeof(version));
423:   MPI_Comm_size(comm, &size);

425:   PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification");
426:   PetscViewerXMLPutString(   viewer, "executable"  , "Executable"   , pname );
427:   PetscViewerXMLPutString(   viewer, "architecture", "Architecture" , arch );
428:   PetscViewerXMLPutString(   viewer, "hostname"    , "Host"         , hostname);
429:   PetscViewerXMLPutInt(      viewer, "nprocesses"  , "Number of processes", size );
430:   PetscViewerXMLPutString(   viewer, "user"        , "Run by user"  , username);
431:   PetscViewerXMLPutString(   viewer, "date"        , "Started at"   , date);
432:   PetscViewerXMLPutString(   viewer, "petscrelease", "Petsc Release", version);
433: #if defined(PETSC_USE_DEBUG)
434:   sprintf(buildoptions, "Debug");
435: #else
436:   buildoptions[0] = 0;
437: #endif
438:   PetscStrlen(buildoptions,&len);
439:   if (len) {
440:     PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions);
441:   }
442:   PetscViewerXMLEndSection(viewer, "runspecification");
443:   return(0);
444: }

446: /* Print the global performance: max, max/min, average and total of 
447:  *      time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
448:  */
449: static PetscErrorCode  PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble max, PetscLogDouble ratio, PetscLogDouble avg, PetscLogDouble tot)
450: {

454:   PetscViewerXMLStartSection(viewer, name, desc);
455:   PetscViewerXMLPutDouble(viewer, "max", NULL, max, "%e");
456:   PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f");
457:   if (avg>-1.0) {
458:     PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e");
459:   }
460:   if (tot>-1.0) {
461:     PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e");
462:   }
463:   PetscViewerXMLEndSection(viewer, name);
464:   return(0);
465: }

467: /* Print the global performance: max, max/min, average and total of 
468:  *      time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
469:  */
470: static PetscErrorCode  PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
471: {
472:   PetscErrorCode     ierr;
473:   PetscLogDouble     min, max, tot, ratio, avg;
474:   PetscLogDouble     flops, mem, red, mess;
475:   PetscMPIInt        size;
476:   MPI_Comm           comm;

479:   PetscObjectGetComm((PetscObject)viewer,&comm);
480:   MPI_Comm_size(comm, &size);

482:   /* Must preserve reduction count before we go on */
483:   red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;

485:   /* Calculate summary information */
486:   PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance");

488:   /*   Time */
489:   MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
490:   MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
491:   MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
492:   avg  = (tot)/((PetscLogDouble) size);
493:   if (min != 0.0) ratio = max/min;
494:   else ratio = 0.0;
495:   PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", max, ratio, avg, -1.0);

497:   /*   Objects */
498:   avg  = (PetscLogDouble) petsc_numObjects;
499:   MPIU_Allreduce(&avg,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
500:   MPIU_Allreduce(&avg,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
501:   MPIU_Allreduce(&avg,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
502:   avg  = (tot)/((PetscLogDouble) size);
503:   if (min != 0.0) ratio = max/min;
504:   else ratio = 0.0;
505:   PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", max, ratio, avg, -1.0);

507:   /*   Flop */
508:   MPIU_Allreduce(&petsc_TotalFlops,  &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
509:   MPIU_Allreduce(&petsc_TotalFlops,  &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
510:   MPIU_Allreduce(&petsc_TotalFlops,  &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
511:   avg  = (tot)/((PetscLogDouble) size);
512:   if (min != 0.0) ratio = max/min;
513:   else ratio = 0.0;
514:   PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);

516:   /*   Flop/sec -- Must talk to Barry here */
517:   if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime;
518:   else flops = 0.0;
519:   MPIU_Allreduce(&flops,        &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
520:   MPIU_Allreduce(&flops,        &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
521:   MPIU_Allreduce(&flops,        &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
522:   avg  = (tot)/((PetscLogDouble) size);
523:   if (min != 0.0) ratio = max/min;
524:   else ratio = 0.0;
525:   PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);

527:   /*   Memory */
528:   PetscMallocGetMaximumUsage(&mem);
529:   if (mem > 0.0) {
530:     MPIU_Allreduce(&mem,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
531:     MPIU_Allreduce(&mem,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
532:     MPIU_Allreduce(&mem,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
533:     avg  = (tot)/((PetscLogDouble) size);
534:     if (min != 0.0) ratio = max/min;
535:     else ratio = 0.0;
536:     PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
537:   }
538:   /*   Messages */
539:   mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
540:   MPIU_Allreduce(&mess,         &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
541:   MPIU_Allreduce(&mess,         &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
542:   MPIU_Allreduce(&mess,         &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
543:   avg  = (tot)/((PetscLogDouble) size);
544:   if (min != 0.0) ratio = max/min;
545:   else ratio = 0.0;
546:   PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", max, ratio, avg, tot);

548:   /*   Message Volume */
549:   mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
550:   MPIU_Allreduce(&mess,         &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
551:   MPIU_Allreduce(&mess,         &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
552:   MPIU_Allreduce(&mess,         &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
553:   avg = (tot)/((PetscLogDouble) size);
554:   if (min != 0.0) ratio = max/min;
555:   else ratio = 0.0;
556:   PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);

558:   /*   Reductions */
559:   MPIU_Allreduce(&red,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
560:   MPIU_Allreduce(&red,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
561:   MPIU_Allreduce(&red,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
562:   if (min != 0.0) ratio = max/min;
563:   else ratio = 0.0;
564:   PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", max, ratio, -1, -1);
565:   PetscViewerXMLEndSection(viewer, "globalperformance");
566:   return(0);
567: }

569: typedef struct {
570:   PetscLogEvent  dftEvent;
571:   NestedEventId  nstEvent;
572:   PetscLogEvent  dftParent;
573:   NestedEventId  nstParent;
574:   PetscBool      own;
575:   int            depth;
576:   NestedEventId* nstPath;
577: } PetscNestedEventTree;

579: /* Compare timers to sort them in the tree */
580: static int compareTreeItems(const void *item1_, const void *item2_)
581: {
582:   int                  i;
583:   PetscNestedEventTree *item1 = (PetscNestedEventTree *) item1_;
584:   PetscNestedEventTree *item2 = (PetscNestedEventTree *) item2_;
585:   for (i=0; i<PetscMin(item1->depth,item2->depth); i++) {
586:     if (item1->nstPath[i]<item2->nstPath[i]) return -1;
587:     if (item1->nstPath[i]>item2->nstPath[i]) return +1;
588:   }
589:   if (item1->depth < item2->depth) return -1;
590:   if (item1->depth > item2->depth) return 1;
591:   return 0;
592: }
593: /*
594:  * Do MPI communication to get the complete, nested calling tree for all processes: there may be
595:  * calls that happen in some processes, but not in others.
596:  *
597:  * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
598:  * The tree is sorted so that the timers can be printed in the order of appearance.
599:  *
600:  * For tree-items which appear in the trees of multiple processes (which will be most items), the 
601:  * following rule is followed:
602:  * + if information from my own process is available, then that is the information stored in tree.
603:  *   otherwise it is some other process's information.
604:  */
605: static PetscErrorCode  PetscCreateLogTreeNested(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
606: {
607:   PetscNestedEventTree *tree = NULL, *newTree;
608:   int                  *treeIndices;
609:   int                  nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
610:   int                  yesno;
611:   PetscBool            done;
612:   PetscErrorCode       ierr;
613:   int                  maxdepth;
614:   int                  depth;
615:   int                  illegalEvent;
616:   int                  iextra;
617:   PetscStageLog        stageLog;
618:   NestedEventId        *nstPath, *nstMyPath;
619:   MPI_Comm             comm;

622:   PetscObjectGetComm((PetscObject)viewer,&comm);
623:   PetscLogGetStageLog(&stageLog);

625:   /* Calculate memory needed to store everybody's information and allocate tree */
626:   nTimers = 0;
627:   for (i=0; i<nNestedEvents; i++) nTimers+=nestedEvents[i].nParents;

629:   PetscMalloc1(nTimers,&tree);

631:   /* Fill tree with readily available information */
632:   iTimer0 = 0;
633:   maxDefaultTimer =0;
634:   for (i=0; i<nNestedEvents; i++) {
635:     int           nParents          = nestedEvents[i].nParents;
636:     NestedEventId nstEvent          = nestedEvents[i].nstEvent;
637:     PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
638:     PetscLogEvent *dftEvents        = nestedEvents[i].dftEvents;
639:     for (j=0; j<nParents; j++) {
640:       maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);

642:       tree[iTimer0+j].dftEvent   = dftEvents[j];
643:       tree[iTimer0+j].nstEvent   = nstEvent;
644:       tree[iTimer0+j].dftParent  = dftParentsSorted[j];
645:       tree[iTimer0+j].own        = PETSC_TRUE;

647:       tree[iTimer0+j].nstParent  = 0;
648:       tree[iTimer0+j].depth      = 0;
649:       tree[iTimer0+j].nstPath    = NULL;
650:     }
651:     iTimer0 += nParents;
652:   }

654:   /* Calculate the global maximum for the default timer index, so array treeIndices can 
655:    * be allocated only once */
656:   MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm);
657:   maxDefaultTimer = j;

659:   /* Find default timer's place in the tree */
660:   PetscCalloc1(maxDefaultTimer+1,&treeIndices);
661:   treeIndices[0] = 0;
662:   for (i=0; i<nTimers; i++) {
663:     PetscLogEvent dftEvent = tree[i].dftEvent;
664:     treeIndices[dftEvent] = i;
665:   }

667:   /* Find each dftParent's nested identification */
668:   for (i=0; i<nTimers; i++) {
669:     PetscLogEvent dftParent = tree[i].dftParent;
670:     if (dftParent!= DFT_ID_AWAKE) {
671:       int j = treeIndices[dftParent];
672:       tree[i].nstParent  = tree[j].nstEvent;
673:     }
674:   }

676:   /* Find depths for each timer path */
677:   done = PETSC_FALSE;
678:   maxdepth = 0;
679:   while (!done) {
680:     done = PETSC_TRUE;
681:     for (i=0; i<nTimers; i++) {
682:       if (tree[i].dftParent == DFT_ID_AWAKE) {
683:         tree[i].depth = 1;
684:         maxdepth = PetscMax(1,maxdepth);
685:       } else {
686:         int j = treeIndices[tree[i].dftParent];
687:         depth = 1+tree[j].depth;
688:         if (depth>tree[i].depth) {
689:           done          = PETSC_FALSE;
690:           tree[i].depth = depth;
691:           maxdepth      = PetscMax(depth,maxdepth);
692:         }
693:       }
694:     }
695:   }

697:   /* Allocate the paths in the entire tree */
698:   for (i=0; i<nTimers; i++) {
699:     depth = tree[i].depth;
700:     PetscCalloc1(depth,&tree[i].nstPath);
701:   }

703:   /* Calculate the paths for all timers */
704:   for (depth=1; depth<=maxdepth; depth++) {
705:     for (i=0; i<nTimers; i++) {
706:       if (tree[i].depth==depth) {
707:         if (depth>1) {
708:           int    j = treeIndices[tree[i].dftParent];
709:           PetscMemcpy(tree[i].nstPath,tree[j].nstPath,(depth-1)*sizeof(NestedEventId));
710:         }
711:         tree[i].nstPath[depth-1] = tree[i].nstEvent;
712:       }
713:     }
714:   }
715:   PetscFree(treeIndices);

717:   /* Sort the tree on basis of the paths */
718:   qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);

720:   /* Allocate an array to store paths */
721:   depth = maxdepth;
722:   MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm);
723:   PetscMalloc1(maxdepth+1, &nstPath);
724:   PetscMalloc1(maxdepth+1, &nstMyPath);

726:   /* Find an illegal nested event index (1+largest nested event index) */
727:   illegalEvent = 1+nestedEvents[nNestedEvents-1].nstEvent;
728:   i = illegalEvent;
729:   MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm);

731:   /* First, detect timers which are not available in this process, but are available in others
732:    *        Allocate a new tree, that can contain all timers
733:    * Then,  fill the new tree with all (own and not-own) timers */
734:   newTree= NULL;
735:   for (yesno=0; yesno<=1; yesno++) {
736:     depth  = 1;
737:     i      = 0;
738:     iextra = 0;
739:     while (depth>0) {
740:       int       j;
741:       PetscBool same;

743:       /* Construct the next path in this process's tree:
744:        * if necessary, supplement with invalid path entries */
745:       depth++;
746:       if (depth > maxdepth + 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Depth %d > maxdepth+1 %d",depth,maxdepth+1);
747:       if (i<nTimers) {
748:         for (j=0;             j<tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
749:         for (j=tree[i].depth; j<depth;         j++) nstMyPath[j] = illegalEvent;
750:       } else {
751:         for (j=0;             j<depth;         j++) nstMyPath[j] = illegalEvent;
752:       }
753: 
754:       /* Communicate with other processes to obtain the next path and its depth */
755:       MPIU_Allreduce(nstMyPath, nstPath, depth, MPI_INT, MPI_MIN, comm);
756:       for (j=depth-1; (int) j>=0; j--) {
757:         if (nstPath[j]==illegalEvent) depth=j;
758:       }
759: 
760:       if (depth>0) {
761:         /* If the path exists */
762: 
763:         /* check whether the next path is the same as this process's next path */
764:         same = PETSC_TRUE;
765:         for (j=0; same && j<depth; j++) { same = (same &&  nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;}
766: 
767:         if (same) {
768:           /* Register 'own path' */
769:           if (newTree) newTree[i+iextra] = tree[i];
770:           i++;
771:         } else {
772:           /* Register 'not an own path' */
773:           if (newTree) {
774:             newTree[i+iextra].nstEvent   = nstPath[depth-1];
775:             newTree[i+iextra].own        = PETSC_FALSE;
776:             newTree[i+iextra].depth      = depth;
777:             PetscMalloc1(depth, &newTree[i+iextra].nstPath);
778:             for (j=0; j<depth; j++) {newTree[i+iextra].nstPath[j] = nstPath[j];}

780:             newTree[i+iextra].dftEvent  = 0;
781:             newTree[i+iextra].dftParent = 0;
782:             newTree[i+iextra].nstParent = 0;
783:           }
784:           iextra++;
785:         }

787:       }
788:     }

790:     /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
791:     totalNTimers = nTimers + iextra;
792:     if (!newTree) {
793:       PetscMalloc1(totalNTimers, &newTree);
794:     }
795:   }
796:   PetscFree(nstPath);
797:   PetscFree(nstMyPath);
798:   PetscFree(tree);
799:   tree = newTree;
800:   newTree = NULL;

802:   /* Set return value and return */
803:   *p_tree    = tree;
804:   *p_nTimers = totalNTimers;
805:   return(0);
806: }

808: /*
809:  * Delete the nested timer tree 
810:  */
811: static PetscErrorCode  PetscLogFreeNestedTree(PetscNestedEventTree *tree, int nTimers)
812: {
813:   int             i;
814:   PetscErrorCode  ierr;
815: 
817:   for (i=0; i<nTimers; i++) {
818:     PetscFree(tree[i].nstPath);
819:   }
820:   PetscFree(tree);
821:   return(0);
822: }

824: /* Print the global performance: max, max/min, average and total of 
825:  *      time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
826:  */
827: static PetscErrorCode  PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble minvalue, PetscLogDouble maxvalue, PetscLogDouble minmaxtreshold)
828: {

832:   PetscViewerXMLStartSection(viewer, name, NULL);
833:   if (maxvalue>minvalue*minmaxtreshold) {
834:     PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%f");
835:     PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%f");
836:   } else {
837:     PetscViewerXMLPutDouble(viewer, "value", NULL, (minvalue+maxvalue)/2.0, "%g");
838:   };
839:   PetscViewerXMLEndSection(viewer, name);
840:   return(0);
841: }

843: #define N_COMM 8
844: static PetscErrorCode  PetscLogPrintNestedLine(PetscViewer viewer,PetscEventPerfInfo perfInfo,PetscLogDouble countsPerCall,int parentCount,int depth,const char *name,PetscLogDouble totalTime,PetscBool *isPrinted)
845: {
846:   PetscLogDouble time = perfInfo.time;
847:   PetscLogDouble timeMx,          timeMn;
848:   PetscLogDouble countsPerCallMx, countsPerCallMn;
849:   PetscLogDouble reductSpeedMx,   reductSpeedMn;
850:   PetscLogDouble flopSpeedMx,     flopSpeedMn;
851:   PetscLogDouble msgSpeedMx,      msgSpeedMn;
852:   PetscLogDouble commarr_in[N_COMM], commarr_out[N_COMM];
854:   MPI_Comm       comm;

857:   PetscObjectGetComm((PetscObject)viewer,&comm);

859:   commarr_in[0] =  time;
860:   commarr_in[1] = -time;
861:   MPIU_Allreduce(commarr_in, commarr_out,    2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
862:   timeMx =  commarr_out[0];
863:   timeMn = -commarr_out[1];

865:   commarr_in[0] = time>=timeMx*0.001 ?  perfInfo.flops/time         : 0;
866:   commarr_in[1] = time>=timeMx*0.001 ?  perfInfo.numReductions/time : 0;
867:   commarr_in[2] = time>=timeMx*0.001 ?  perfInfo.messageLength/time : 0;
868:   commarr_in[3] = parentCount>0    ?  countsPerCall      : 0;

870:   commarr_in[4] = time>=timeMx*0.001 ? -perfInfo.flops/time         : -1e30;
871:   commarr_in[5] = time>=timeMx*0.001 ? -perfInfo.numReductions/time : -1e30;
872:   commarr_in[6] = time>=timeMx*0.001 ? -perfInfo.messageLength/time : -1e30;
873:   commarr_in[7] = parentCount>0    ? -countsPerCall      : -1e30;

875:   MPIU_Allreduce(commarr_in, commarr_out,  N_COMM, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);

877:   flopSpeedMx     =  commarr_out[0];
878:   reductSpeedMx   =  commarr_out[1];
879:   msgSpeedMx      =  commarr_out[2];
880:   countsPerCallMx =  commarr_out[3];

882:   flopSpeedMn     = -commarr_out[4];
883:   reductSpeedMn   = -commarr_out[5];
884:   msgSpeedMn      = -commarr_out[6];
885:   countsPerCallMn = -commarr_out[7];

887:   *isPrinted = ((timeMx/totalTime) > (threshTime/100.0)) ? PETSC_TRUE : PETSC_FALSE;
888:   if (isPrinted) {
889:     PetscViewerXMLStartSection(viewer, "event", NULL);
890:     PetscViewerXMLPutString(viewer, "name", NULL, name);
891:     PetscPrintXMLNestedLinePerfResults(viewer, "time", timeMn/totalTime*100.0, timeMx/totalTime*100.0, 1.02);


894:     if (countsPerCallMx<1.01 && countsPerCallMn>0.99) {
895:       /* One call per parent */
896:     } else {
897:       PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", countsPerCallMn, countsPerCallMx, 1.02);
898:     }
899: 
900:     if (flopSpeedMx<0.01) {
901:       /* NO flops: don't print */
902:     } else {
903:       PetscPrintXMLNestedLinePerfResults(viewer, "mflops", flopSpeedMn/1e6, flopSpeedMx/1e6, 1.05);
904:     }
905: 
906:     if (msgSpeedMx<0.01) {
907:       /* NO msgs: don't print */
908:     } else {
909:       PetscPrintXMLNestedLinePerfResults(viewer, "mbps", msgSpeedMn/1024.0/1024.0, msgSpeedMx/1024.0/1024.0, 1.05);
910:     }
911: 
912:     if (reductSpeedMx<0.01) {
913:       /* NO reductions: don't print */
914:     } else {
915:       PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", reductSpeedMn, reductSpeedMx, 1.05);
916:     }
917:   }
918:   return(0);
919: }

921: /* Count the number of times the parent event was called */

923: static int countParents( const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
924: {
925:    if (tree[i].depth<=1) {
926:      return 1;  /* Main event: only once */
927:    } else if (!tree[i].own) {
928:      return 1;  /* This event didn't happen in this process, but did in another */
929:    } else {
930:      int iParent;
931:      for (iParent=i-1; iParent>=0; iParent--) {
932:        if (tree[iParent].depth == tree[i].depth-1) break;
933:      }
934:      if (tree[iParent].depth != tree[i].depth-1) {
935:        printf("\n\n   *****  Internal error: cannot find parent ****\n\n");
936:        return -2;
937:      } else {
938:        PetscLogEvent dftEvent  = tree[iParent].dftEvent;
939:        return eventPerfInfo[dftEvent].count;
940:      }
941:    }
942: }

944: typedef struct {
945:   int             id;
946:   PetscLogDouble  val;
947: } PetscSortItem;

949: static int compareSortItems(const void *item1_, const void *item2_)
950: {
951:   PetscSortItem *item1 = (PetscSortItem *) item1_;
952:   PetscSortItem *item2 = (PetscSortItem *) item2_;
953:   if (item1->val > item2->val) return -1;
954:   if (item1->val < item2->val) return +1;
955:   return 0;
956: }

958: static PetscErrorCode  PetscLogNestedPrint(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, int iStart, PetscLogDouble totalTime)
959: {
960:   int                depth   = tree[iStart].depth;
961:   const char         *name;
962:   int                parentCount, nChildren;
963:   PetscSortItem      *children;
964:   PetscErrorCode     ierr;
965:   PetscEventPerfInfo *eventPerfInfo;
966:   PetscEventPerfInfo myPerfInfo,  otherPerfInfo, selfPerfInfo;
967:   PetscLogDouble     countsPerCall;
968:   PetscBool          wasPrinted;
969:   PetscBool          childWasPrinted;
970:   MPI_Comm           comm;

972:   {
973:   /* Look up the name of the event and its PerfInfo */
974:      const int          stage=0;
975:      PetscStageLog      stageLog;
976:      PetscEventRegInfo  *eventRegInfo;
977:      PetscLogGetStageLog(&stageLog);
978:      eventRegInfo  = stageLog->eventLog->eventInfo;
979:      eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
980:      name          = eventRegInfo[(PetscLogEvent) tree[iStart].nstEvent].name;
981:   }

983:   PetscObjectGetComm((PetscObject)viewer,&comm);

985:   /* Count the number of child processes */
986:   nChildren = 0;
987:   {
988:     int i;
989:     for (i=iStart+1; i<nTimers; i++) {
990:       if (tree[i].depth<=depth) break;
991:       if (tree[i].depth == depth + 1) nChildren++;
992:     }
993:   }

995:   if (nChildren>0) {
996:     /* Create an array for the id-s and maxTimes of the children,
997:      *  leaving 2 spaces for self-time and other-time */
998:     int            i;
999:     PetscLogDouble *times, *maxTimes;

1001:     PetscMalloc1(nChildren+2,&children);
1002:     nChildren = 0;
1003:     for (i=iStart+1; i<nTimers; i++) {
1004:       if (tree[i].depth<=depth) break;
1005:       if (tree[i].depth == depth + 1) {
1006:         children[nChildren].id  = i;
1007:         children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1008:         nChildren++;
1009:       }
1010:     }

1012:     /* Calculate the children's maximum times, to see whether children will be ignored or printed */
1013:     PetscMalloc1(nChildren,&times);
1014:     for (i=0; i<nChildren; i++) { times[i] = children[i].val; }

1016:     PetscMalloc1(nChildren,&maxTimes);
1017:     MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1018:     PetscFree(times);

1020:     for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1021:     PetscFree(maxTimes);
1022:   }

1024:   if (!tree[iStart].own) {
1025:   /* Set values for a timer that was not activated in this process 
1026:    * (but was, in other processes of this run) */
1027:     PetscMemzero(&myPerfInfo,sizeof(myPerfInfo));

1029:     selfPerfInfo  = myPerfInfo;
1030:     otherPerfInfo = myPerfInfo;

1032:     parentCount   = 1;
1033:     countsPerCall = 0;
1034:   } else {
1035:   /* Set the values for a timer that was activated in this process */
1036:     int           i;
1037:     PetscLogEvent dftEvent   = tree[iStart].dftEvent;

1039:     parentCount    = countParents( tree, eventPerfInfo, iStart);
1040:     myPerfInfo     = eventPerfInfo[dftEvent];
1041:     countsPerCall  = (PetscLogDouble) myPerfInfo.count / (PetscLogDouble) parentCount;

1043:     selfPerfInfo                = myPerfInfo;
1044:     otherPerfInfo.time          = 0;
1045:     otherPerfInfo.flops         = 0;
1046:     otherPerfInfo.numMessages   = 0;
1047:     otherPerfInfo.messageLength = 0;
1048:     otherPerfInfo.numReductions = 0;

1050:     for (i=0; i<nChildren; i++) {
1051:       /* For all child counters: subtract the child values from self-timers */

1053:       PetscLogEvent      dftChild  = tree[children[i].id].dftEvent;
1054:       PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];

1056:       selfPerfInfo.time          -= childPerfInfo.time;
1057:       selfPerfInfo.flops         -= childPerfInfo.flops;
1058:       selfPerfInfo.numMessages   -= childPerfInfo.numMessages;
1059:       selfPerfInfo.messageLength -= childPerfInfo.messageLength;
1060:       selfPerfInfo.numReductions -= childPerfInfo.numReductions;

1062:       if ((children[i].val/totalTime) < (threshTime/100.0)) {
1063:         /* Add them to 'other' if the time is ignored in the output */
1064:         otherPerfInfo.time          += childPerfInfo.time;
1065:         otherPerfInfo.flops         += childPerfInfo.flops;
1066:         otherPerfInfo.numMessages   += childPerfInfo.numMessages;
1067:         otherPerfInfo.messageLength += childPerfInfo.messageLength;
1068:         otherPerfInfo.numReductions += childPerfInfo.numReductions;
1069:       }
1070:     }
1071:   }

1073:   /* Main output for this timer */
1074:   PetscLogPrintNestedLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted);

1076:   /* Now print the lines for the children */
1077:   if (nChildren>0) {
1078:     /* Calculate max-times for 'self' and 'other' */
1079:     int            i;
1080:     PetscLogDouble times[2], maxTimes[2];
1081:     times[0] = selfPerfInfo.time;   times[1] = otherPerfInfo.time;
1082:     MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1083:     children[nChildren+0].id = -1;
1084:     children[nChildren+0].val = maxTimes[0];
1085:     children[nChildren+1].id = -2;
1086:     children[nChildren+1].val = maxTimes[1];

1088:     /* Now sort the children (including 'self' and 'other') on total time */
1089:     qsort(children, nChildren+2, sizeof(PetscSortItem), compareSortItems);

1091:     /* Print (or ignore) the children in ascending order of total time */
1092:     PetscViewerXMLStartSection(viewer,"events", NULL);
1093:     for (i=0; i<nChildren+2; i++) {
1094:       if ((children[i].val/totalTime) < (threshTime/100.0)) {
1095:         /* ignored: no output */
1096:       } else if (children[i].id==-1) {
1097:         PetscLogPrintNestedLine(viewer, selfPerfInfo, 1, parentCount, depth+1, "self", totalTime, &childWasPrinted);
1098:         if (childWasPrinted) {
1099:           PetscViewerXMLEndSection(viewer,"event");
1100:         }
1101:       } else if (children[i].id==-2) {
1102:         size_t  len;
1103:         char    *otherName;

1105:         PetscStrlen(name,&len);
1106:         PetscMalloc1(16+len,&otherName);
1107:         sprintf(otherName,"%s: other-timed",name);
1108:         PetscLogPrintNestedLine(viewer, otherPerfInfo, 1, 1, depth+1, otherName, totalTime, &childWasPrinted);
1109:         PetscFree(otherName);
1110:         if (childWasPrinted) {
1111:           PetscViewerXMLEndSection(viewer,"event");
1112:         }
1113:       } else {
1114:         /* Print the child with a recursive call to this function */
1115:         PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1116:       }
1117:     }
1118:     PetscViewerXMLEndSection(viewer,"events");
1119:     PetscFree(children);
1120:   }

1122:   if (wasPrinted) {
1123:     PetscViewerXMLEndSection(viewer, "event");
1124:   }
1125:   return 0;
1126: }

1128: static PetscErrorCode  PetscLogNestedPrintTop(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, PetscLogDouble totalTime)
1129: {
1130:   int                nChildren;
1131:   PetscSortItem      *children;
1132:   PetscErrorCode     ierr;
1133:   PetscEventPerfInfo *eventPerfInfo;
1134:   MPI_Comm           comm;

1137:   PetscObjectGetComm((PetscObject)viewer,&comm);
1138:   {
1139:   /* Look up the PerfInfo */
1140:      const int          stage=0;
1141:      PetscStageLog      stageLog;
1142:      PetscLogGetStageLog(&stageLog);
1143:      eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1144:   }

1146:   /* Count the number of child processes, and count total time */
1147:   nChildren = 0;
1148:   {
1149:     int i;
1150:     for (i=0; i<nTimers; i++) {
1151:       if (tree[i].depth==1) nChildren++;
1152:     }
1153:   }

1155:   if (nChildren>0) {
1156:     /* Create an array for the id-s and maxTimes of the children,
1157:      *  leaving 2 spaces for self-time and other-time */
1158:     int            i;
1159:     PetscLogDouble *times, *maxTimes;

1161:     PetscMalloc1(nChildren,&children);
1162:     nChildren = 0;
1163:     for (i=0; i<nTimers; i++) {
1164:       if (tree[i].depth == 1) {
1165:         children[nChildren].id  = i;
1166:         children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1167:         nChildren++;
1168:       }
1169:     }
1170: 
1171:     /* Calculate the children's maximum times, to sort them */
1172:     PetscMalloc1(nChildren,&times);
1173:     for (i=0; i<nChildren; i++) { times[i] = children[i].val; }

1175:     PetscMalloc1(nChildren,&maxTimes);
1176:     MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1177:     PetscFree(times);

1179:     for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1180:     PetscFree(maxTimes);

1182:     /* Now sort the children on total time */
1183:     qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1184:     /* Print (or ignore) the children in ascending order of total time */
1185:     PetscViewerXMLStartSection(viewer, "timertree", "Timings tree");
1186:     PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1187:     PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, threshTime, "%f");

1189:     for (i=0; i<nChildren; i++) {
1190:       if ((children[i].val/totalTime) < (threshTime/100.0)) {
1191:         /* ignored: no output */
1192:       } else {
1193:         /* Print the child with a recursive call to this function */
1194:         PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1195:       }
1196:     }
1197:     PetscViewerXMLEndSection(viewer, "timertree");
1198:     PetscFree(children);
1199:   }
1200:   return(0);
1201: }

1203: typedef struct {
1204:   char           *name;
1205:   PetscLogDouble time;
1206:   PetscLogDouble flops;
1207:   PetscLogDouble numMessages;
1208:   PetscLogDouble messageLength;
1209:   PetscLogDouble numReductions;
1210: } PetscSelfTimer;

1212: static PetscErrorCode  PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1213: {
1214:   PetscErrorCode     ierr;
1215:   PetscEventPerfInfo *eventPerfInfo;
1216:   PetscEventRegInfo  *eventRegInfo;
1217:   PetscSelfTimer     *selftimes;
1218:   PetscSelfTimer     *totaltimes;
1219:   NestedEventId      *nstEvents;
1220:   int                i, maxDefaultTimer;
1221:   NestedEventId      nst;
1222:   PetscLogEvent      dft;
1223:   int                nstMax, nstMax_local;
1224:   MPI_Comm           comm;
1225: 
1227:   PetscObjectGetComm((PetscObject)viewer,&comm);
1228:   {
1229:     const int          stage=0;
1230:     PetscStageLog      stageLog;
1231:     PetscLogGetStageLog(&stageLog);
1232:     eventRegInfo  = stageLog->eventLog->eventInfo;
1233:     eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1234:   }

1236:   /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1237:   maxDefaultTimer =0;
1238:   for (i=0; i<nNestedEvents; i++) {
1239:     int           nParents         = nestedEvents[i].nParents;
1240:     PetscLogEvent *dftEvents       = nestedEvents[i].dftEvents;
1241:      int j;
1242:      for (j=0; j<nParents; j++) {
1243:        maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
1244:      }
1245:   }
1246:   PetscMalloc1(maxDefaultTimer+1,&nstEvents);
1247:   for (dft=0; dft<maxDefaultTimer; dft++) {nstEvents[dft] = 0;}
1248:   for (i=0; i<nNestedEvents; i++) {
1249:     int           nParents          = nestedEvents[i].nParents;
1250:     NestedEventId nstEvent          = nestedEvents[i].nstEvent;
1251:     PetscLogEvent *dftEvents        = nestedEvents[i].dftEvents;
1252:     int           j;
1253:     for (j=0; j<nParents; j++) { nstEvents[dftEvents[j]] = nstEvent; }
1254:   }

1256:   /* Calculate largest nested event-ID */
1257:   nstMax_local = 0;
1258:   for (i=0; i<nNestedEvents; i++) { if (nestedEvents[i].nstEvent>nstMax_local) {nstMax_local = nestedEvents[i].nstEvent;} }
1259:   MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm);


1262:   /* Initialize all total-times with zero */
1263:   PetscMalloc1(nstMax+1,&selftimes);
1264:   PetscMalloc1(nstMax+1,&totaltimes);
1265:   for (nst=0; nst<=nstMax; nst++) {
1266:     totaltimes[nst].time          = 0;
1267:     totaltimes[nst].flops         = 0;
1268:     totaltimes[nst].numMessages   = 0;
1269:     totaltimes[nst].messageLength = 0;
1270:     totaltimes[nst].numReductions = 0;
1271:     totaltimes[nst].name          = NULL;
1272:   }

1274:   /* Calculate total-times */
1275:   for (i=0; i<nNestedEvents; i++) {
1276:     const int            nParents  = nestedEvents[i].nParents;
1277:     const NestedEventId  nstEvent  = nestedEvents[i].nstEvent;
1278:     const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1279:     int                  j;
1280:     for (j=0; j<nParents; j++) {
1281:       const PetscLogEvent dftEvent = dftEvents[j];
1282:       totaltimes[nstEvent].time          += eventPerfInfo[dftEvent].time;
1283:       totaltimes[nstEvent].flops         += eventPerfInfo[dftEvent].flops;
1284:       totaltimes[nstEvent].numMessages   += eventPerfInfo[dftEvent].numMessages;
1285:       totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1286:       totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1287:     }
1288:     totaltimes[nstEvent].name    = eventRegInfo[(PetscLogEvent) nstEvent].name;
1289:   }

1291:   /* Initialize: self-times := totaltimes */
1292:   for (nst=0; nst<=nstMax; nst++) { selftimes[nst] = totaltimes[nst]; }

1294:   /* Subtract timed subprocesses from self-times */
1295:   for (i=0; i<nNestedEvents; i++) {
1296:     const int           nParents          = nestedEvents[i].nParents;
1297:     const PetscLogEvent *dftEvents        = nestedEvents[i].dftEvents;
1298:     const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1299:     int                 j;
1300:     for (j=0; j<nParents; j++) {
1301:       if (dftParentsSorted[j] != DFT_ID_AWAKE) {
1302:         const PetscLogEvent dftEvent  = dftEvents[j];
1303:         const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1304:         selftimes[nstParent].time          -= eventPerfInfo[dftEvent].time;
1305:         selftimes[nstParent].flops         -= eventPerfInfo[dftEvent].flops;
1306:         selftimes[nstParent].numMessages   -= eventPerfInfo[dftEvent].numMessages;
1307:         selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1308:         selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1309:       }
1310:     }
1311:   }

1313:   PetscFree(nstEvents);
1314:   PetscFree(totaltimes);

1316:   /* Set outputs */
1317:   *p_self  = selftimes;
1318:   *p_nstMax = nstMax;
1319:   return(0);
1320: }

1322: static PetscErrorCode  PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1323: {
1324:   PetscErrorCode     ierr;
1325:   int                i;
1326:   NestedEventId      nst;
1327:   PetscSortItem      *sortSelfTimes;
1328:   PetscLogDouble     *times, *maxTimes;
1329:   PetscEventRegInfo  *eventRegInfo;
1330:   const int          dum_depth = 1, dum_count=1, dum_parentcount=1;
1331:   PetscBool          wasPrinted;
1332:   MPI_Comm           comm;

1335:   PetscObjectGetComm((PetscObject)viewer,&comm);
1336:   {
1337:     PetscStageLog      stageLog;
1338:     PetscLogGetStageLog(&stageLog);
1339:     eventRegInfo  = stageLog->eventLog->eventInfo;
1340:   }

1342:   PetscMalloc1(nstMax+1,&times);
1343:   PetscMalloc1(nstMax+1,&maxTimes);
1344:   for (nst=0; nst<=nstMax; nst++) { times[nst] = selftimes[nst].time;}
1345:   MPIU_Allreduce(times, maxTimes, nstMax+1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1346:   PetscFree(times);

1348:   PetscMalloc1(nstMax+1,&sortSelfTimes);

1350:   /* Sort the self-timers on basis of the largest time needed */
1351:   for (nst=0; nst<=nstMax; nst++) {
1352:     sortSelfTimes[nst].id  = nst;
1353:     sortSelfTimes[nst].val = maxTimes[nst];
1354:   }
1355:   PetscFree(maxTimes);
1356:   qsort(sortSelfTimes, nstMax+1, sizeof(PetscSortItem), compareSortItems);

1358:   PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings");
1359:   PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");

1361:   for (i=0; i<=nstMax; i++) {
1362:     if ((sortSelfTimes[i].val/totalTime) >= (threshTime/100.0)) {
1363:       NestedEventId      nstEvent = sortSelfTimes[i].id;
1364:       PetscEventPerfInfo selfPerfInfo;
1365:       const char         *name     = eventRegInfo[(PetscLogEvent) nstEvent].name;

1367:       selfPerfInfo.time          = selftimes[nstEvent].time ;
1368:       selfPerfInfo.flops         = selftimes[nstEvent].flops;
1369:       selfPerfInfo.numMessages   = selftimes[nstEvent].numMessages;
1370:       selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1371:       selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1372: 
1373:       PetscLogPrintNestedLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted);
1374:       if (wasPrinted){
1375:         PetscViewerXMLEndSection(viewer, "event");
1376:       }
1377:     }
1378:   }
1379:   PetscViewerXMLEndSection(viewer, "selftimertable");
1380:   PetscFree(sortSelfTimes);
1381:   return(0);
1382: }

1384: PetscErrorCode  PetscLogView_Nested(PetscViewer viewer)
1385: {
1386:   MPI_Comm           comm;
1387:   PetscErrorCode     ierr;
1388:   PetscLogDouble     locTotalTime, globTotalTime;
1389:   PetscNestedEventTree *tree = NULL;
1390:   PetscSelfTimer     *selftimers = NULL;
1391:   int                nTimers = 0, nstMax = 0;
1392:   PetscViewerType    vType;

1395:   PetscViewerGetType(viewer,&vType);

1397:   /* Set useXMLFormat that controls the format in all local PetscPrint.. functions */
1398:   PetscViewerInitASCII_XML(viewer);

1400:   PetscObjectGetComm((PetscObject)viewer,&comm);

1402:   PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n");
1403:   PetscViewerXMLStartSection(viewer, "petscroot", NULL);

1405:   /* Get the total elapsed time, local and global maximum */
1406:   PetscTime(&locTotalTime);  locTotalTime -= petsc_BaseTime;
1407:   MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);

1409:   /* Print global information about this run */
1410:   PetscPrintExeSpecs(viewer);
1411:   PetscPrintGlobalPerformance(viewer, locTotalTime);
1412: 
1413:   /* Collect nested timer tree info from all processes */
1414:   PetscCreateLogTreeNested(viewer, &tree, &nTimers);
1415:   PetscLogNestedPrintTop(viewer, tree, nTimers, globTotalTime);
1416:   PetscLogFreeNestedTree(tree, nTimers);

1418:   /* Calculate self-time for all (not-nested) events */
1419:   PetscCalcSelfTime(viewer, &selftimers, &nstMax);
1420:   PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime);
1421:   PetscFree(selftimers);

1423:   PetscViewerXMLEndSection(viewer, "petscroot");
1424:   PetscViewerFinalASCII_XML(viewer);
1425:   PetscLogNestedEnd();
1426:   return(0);
1427: }

1429: PETSC_EXTERN PetscErrorCode PetscASend(int count, int datatype)
1430: {
1433: #endif

1436:   petsc_send_ct++;
1438:   PetscMPITypeSize(&petsc_send_len,count, MPI_Type_f2c((MPI_Fint) datatype));
1439: #endif
1440:   return(0);
1441: }

1443: PETSC_EXTERN PetscErrorCode PetscARecv(int count, int datatype)
1444: {
1447: #endif

1450:   petsc_recv_ct++;
1452:   PetscMPITypeSize(&petsc_recv_len,count, MPI_Type_f2c((MPI_Fint) datatype));
1453: #endif
1454:   return(0);
1455: }

1457: PETSC_EXTERN PetscErrorCode PetscAReduce()
1458: {
1460:   petsc_allreduce_ct++;
1461:   return(0);
1462: }

1464: #endif