Actual source code: xmllogevent.c
petsc-3.9.1 2018-04-29
1: /*************************************************************************************
2: * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S *
3: *************************************************************************************
4: * authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij *
5: *************************************************************************************
6: * content: Support for nested PetscTimers *
7: *************************************************************************************/
8: #include <petsclog.h>
9: #include <petsc/private/logimpl.h>
10: #include <petsctime.h>
11: #include <petscviewer.h>
12: #include "../src/sys/logging/xmllogevent.h"
13: #include "../src/sys/logging/xmlviewer.h"
15: #if defined(PETSC_USE_LOG)
17: /*
18: * Support for nested PetscTimers
19: *
20: * PetscTimers keep track of a lot of useful information: Wall clock times,
21: * message passing statistics, flop counts. Information about the nested structure
22: * of the timers is lost. Example:
23: *
24: * 7:30 Start: awake
25: * 7:30 Start: morning routine
26: * 7:40 Start: eat
27: * 7:49 Done: eat
28: * 7:43 Done: morning routine
29: * 8:15 Start: work
30: * 12:15 Start: eat
31: * 12:45 Done: eat
32: * 16:00 Done: work
33: * 16:30 Start: evening routine
34: * 18:30 Start: eat
35: * 19:15 Done: eat
36: * 22:00 Done: evening routine
37: * 22:00 Done: awake
38: *
39: * Petsc timers provide the following timer results:
40: *
41: * awake: 1 call 14:30 hours
42: * morning routine: 1 call 0:13 hours
43: * eat: 3 calls 1:24 hours
44: * work: 1 call 7:45 hours
45: * evening routine 1 call 5:30 hours
46: *
47: * Nested timers can be used to get the following table:
48: *
49: * [1 call]: awake 14:30 hours
50: * [1 call]: morning routine 0:13 hours ( 2 % of awake)
51: * [1 call]: eat 0:09 hours (69 % of morning routine)
52: * rest (morning routine) 0:04 hours (31 % of morning routine)
53: * [1 call]: work 7:45 hours (53 % of awake)
54: * [1 call]: eat 0:30 hours ( 6 % of work)
55: * rest (work) 7:15 hours (94 % of work)
56: * [1 call]: evening routine 5:30 hours (38 % of awake)
57: * [1 call]: eat 0:45 hours (14 % of evening routine)
58: * rest (evening routine) 4:45 hours (86 % of morning routine)
59: *
60: * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least,
61: * the nested timers make the stages unnecessary.
62: *
63: */
65: /*
66: * Data structures for keeping track of nested timers:
67: *
68: * nestedEvents: information about the timers that have actually been activated
69: * dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
70: *
71: * The Default-timers are used to time the nested timers. Every nested timer corresponds to
72: * (one or more) default timers, where one of the default timers has the same event-id as the
73: * nested one.
74: *
75: * Because of the risk of confusion between nested timer ids and default timer ids, we
76: * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent
77: * only for default events. Also, all nested event variables are prepended with 'nst', and
78: * default timers with 'dft'.
79: */
81: #define DFT_ID_AWAKE -1
83: typedef PetscLogEvent NestedEventId;
84: typedef struct {
85: NestedEventId nstEvent; /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
86: int nParents; /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
87: PetscLogEvent *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
88: PetscLogEvent *dftEvents; /* The default timers which represent the different 'instances' of this nested event */
90: PetscLogEvent *dftParents; /* The default timers which were the dftParentActive when this nested event was started */
91: PetscLogEvent *dftEventsSorted; /* The default timers which represent the different 'instances' of this nested event */
92: } PetscNestedEvent;
94: static PetscLogEvent dftParentActive = DFT_ID_AWAKE;
95: static int nNestedEvents = 0;
96: static int nNestedEventsAllocated = 0;
97: static PetscNestedEvent *nestedEvents = NULL;
98: static PetscLogDouble threshTime = 0.01; /* initial value was .1 */
100: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
101: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
103: /*@C
104: PetscLogNestedBegin - Turns on nested logging of objects and events. This logs flop
105: rates and object creation and should not slow programs down too much.
107: Logically Collective over PETSC_COMM_WORLD
109: Options Database Keys:
110: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
112: Usage:
113: .vb
114: PetscInitialize(...);
115: PetscLogNestedBegin();
116: ... code ...
117: PetscLogView(viewer);
118: PetscFinalize();
119: .ve
121: Level: advanced
123: .keywords: log, begin
124: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin()
125: @*/
126: PetscErrorCode PetscLogNestedBegin(void)
127: {
128: PetscErrorCode ierr;
130: if (nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents already allocated");
132: nNestedEventsAllocated=10;
133: PetscMalloc1(nNestedEventsAllocated,&nestedEvents);
136: dftParentActive = DFT_ID_AWAKE;
137: nNestedEvents =1;
139: /* 'Awake' is nested event 0. It has no parents */
140: nestedEvents[0].nstEvent = 0;
141: nestedEvents[0].nParents = 0;
142: nestedEvents[0].dftParentsSorted = NULL;
143: nestedEvents[0].dftEvents = NULL;
144: nestedEvents[0].dftParents = NULL;
145: nestedEvents[0].dftEventsSorted = NULL;
147: PetscLogSet(PetscLogEventBeginNested, PetscLogEventEndNested);
148: return(0);
149: }
151: /* Delete the data structures for the nested timers */
152: PetscErrorCode PetscLogNestedEnd(void)
153: {
154: PetscErrorCode ierr;
155: int i;
158: if (!nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents does not exist");
160: for (i=0; i<nNestedEvents; i++) {
161: PetscFree4(nestedEvents[i].dftParentsSorted,nestedEvents[i].dftEventsSorted,nestedEvents[i].dftParents,nestedEvents[i].dftEvents);
162: }
163: PetscFree(nestedEvents);
164: nestedEvents = NULL;
165: nNestedEvents = 0;
166: nNestedEventsAllocated = 0;
167: return(0);
168: }
171: /*
172: * UTILITIES: FIND STUFF IN SORTED ARRAYS
173: *
174: * Utility: find a default timer in a sorted array */
175: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex, /* index to be found */
176: const PetscLogEvent *dftArray, /* sorted array of PetscLogEvent-ids */
177: int narray, /* dimension of dftArray */
178: int *entry) /* entry in the array where dftIndex may be found;
179: * if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray
180: * In that case, the dftIndex can be inserted at this entry. */
181: {
183: if (narray==0 || dftIndex <= dftArray[0]) {
184: *entry = 0;
185: } else if (dftIndex > dftArray[narray-1]) {
186: *entry = narray;
187: } else {
188: int ihigh=narray-1, ilow=0;
189: while (ihigh>ilow) {
190: const int imiddle = (ihigh+ilow)/2;
191: if (dftArray[imiddle] > dftIndex) {
192: ihigh=imiddle;
193: } else if (dftArray[imiddle]<dftIndex) {
194: ilow =imiddle+1;
195: } else {
196: ihigh=imiddle;
197: ilow =imiddle;
198: }
199: }
200: *entry = ihigh;
201: }
202: return(0);
203: }
205: /* Utility: find the nested event with given identification */
206: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent, /* Nested event to be found */
207: int *entry) /* entry in the nestedEvents where nstEvent may be found;
208: if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray */
209: {
212: if (nNestedEvents==0 || nstEvent <= nestedEvents[0].nstEvent) {
213: *entry = 0;
214: } else if (nstEvent > nestedEvents[nNestedEvents-1].nstEvent) {
215: *entry = nNestedEvents;
216: } else {
217: int ihigh=nNestedEvents-1, ilow=0;
218: while (ihigh>ilow) {
219: const int imiddle = (ihigh+ilow)/2;
220: if (nestedEvents[imiddle].nstEvent > nstEvent) {
221: ihigh=imiddle;
222: } else if (nestedEvents[imiddle].nstEvent<nstEvent) {
223: ilow =imiddle+1;
224: } else {
225: ihigh=imiddle;
226: ilow =imiddle;
227: }
228: }
229: *entry = ihigh;
230: }
231: return(0);
232: }
234: /******************************************************************************************/
235: /* Start a nested event */
236: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
237: {
238: PetscErrorCode ierr;
239: int entry, pentry, tentry,i;
240: PetscLogEvent dftEvent;
243: PetscLogEventFindNestedTimer(nstEvent, &entry);
244: if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
245: /* Nested event doesn't exist yet: create it */
247: if (nNestedEvents==nNestedEventsAllocated) {
248: /* Enlarge and re-allocate nestedEvents if needed */
249: PetscNestedEvent *tmp = nestedEvents;
250: PetscMalloc1(2*nNestedEvents,&nestedEvents);
251: nNestedEventsAllocated*=2;
252: PetscMemcpy(nestedEvents, tmp, nNestedEvents*sizeof(PetscNestedEvent));
253: PetscFree(tmp);
254: }
256: /* Clear space in nestedEvents for new nested event */
257: nNestedEvents++;
258: for (i = nNestedEvents-1; i>entry; i--) {
259: nestedEvents[i] = nestedEvents[i-1];
260: }
262: /* Create event in nestedEvents */
263: nestedEvents[entry].nstEvent = nstEvent;
264: nestedEvents[entry].nParents=1;
265: PetscMalloc4(1,&nestedEvents[entry].dftParentsSorted,1,&nestedEvents[entry].dftEventsSorted,1,&nestedEvents[entry].dftParents,1,&nestedEvents[entry].dftEvents);
267: /* Fill in new event */
268: pentry = 0;
269: dftEvent = (PetscLogEvent) nstEvent;
271: nestedEvents[entry].nstEvent = nstEvent;
272: nestedEvents[entry].dftParents[pentry] = dftParentActive;
273: nestedEvents[entry].dftEvents[pentry] = dftEvent;
274: nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
275: nestedEvents[entry].dftEventsSorted[pentry] = dftEvent;
277: } else {
278: /* Nested event exists: find current dftParentActive among parents */
279: PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
280: PetscLogEvent *dftEvents = nestedEvents[entry].dftEvents;
281: int nParents = nestedEvents[entry].nParents;
283: PetscLogEventFindDefaultTimer( dftParentActive, dftParentsSorted, nParents, &pentry);
285: if (pentry>=nParents || dftParentActive != dftParentsSorted[pentry]) {
286: /* dftParentActive not in the list: add it to the list */
287: int i;
288: PetscLogEvent *dftParents = nestedEvents[entry].dftParents;
289: PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
290: char name[100];
292: /* Register a new default timer */
293: sprintf(name, "%d -> %d", (int) dftParentActive, (int) nstEvent);
294: PetscLogEventRegister(name, 0, &dftEvent);
295: PetscLogEventFindDefaultTimer( dftEvent, dftEventsSorted, nParents, &tentry);
297: /* Reallocate parents and dftEvents to make space for new parent */
298: PetscMalloc4(1+nParents,&nestedEvents[entry].dftParentsSorted,1+nParents,&nestedEvents[entry].dftEventsSorted,1+nParents,&nestedEvents[entry].dftParents,1+nParents,&nestedEvents[entry].dftEvents);
299: PetscMemcpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents*sizeof(PetscLogEvent));
300: PetscMemcpy(nestedEvents[entry].dftEventsSorted, dftEventsSorted, nParents*sizeof(PetscLogEvent));
301: PetscMemcpy(nestedEvents[entry].dftParents, dftParents, nParents*sizeof(PetscLogEvent));
302: PetscMemcpy(nestedEvents[entry].dftEvents, dftEvents, nParents*sizeof(PetscLogEvent));
303: PetscFree4(dftParentsSorted,dftEventsSorted,dftParents,dftEvents);
305: dftParents = nestedEvents[entry].dftParents;
306: dftEvents = nestedEvents[entry].dftEvents;
307: dftParentsSorted = nestedEvents[entry].dftParentsSorted;
308: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
310: nestedEvents[entry].nParents++;
311: nParents++;
313: for (i = nParents-1; i>pentry; i--) {
314: dftParentsSorted[i] = dftParentsSorted[i-1];
315: dftEvents[i] = dftEvents[i-1];
316: }
317: for (i = nParents-1; i>tentry; i--) {
318: dftParents[i] = dftParents[i-1];
319: dftEventsSorted[i] = dftEventsSorted[i-1];
320: }
322: /* Fill in the new default timer */
323: dftParentsSorted[pentry] = dftParentActive;
324: dftEvents[pentry] = dftEvent;
325: dftParents[tentry] = dftParentActive;
326: dftEventsSorted[tentry] = dftEvent;
328: } else {
329: /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
330: dftEvent = nestedEvents[entry].dftEvents[pentry];
331: }
332: }
334: /* Start the default 'dftEvent'-timer and update the dftParentActive */
335: PetscLogEventBeginDefault(dftEvent,t,o1,o2,o3,o4);
336: dftParentActive = dftEvent;
337: return(0);
338: }
340: /* End a nested event */
341: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
342: {
343: PetscErrorCode ierr;
344: int entry, pentry, nParents;
345: PetscLogEvent *dftEventsSorted;
348: /* Find the nested event */
349: PetscLogEventFindNestedTimer(nstEvent, &entry);
350: if (entry>=nNestedEvents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d larger than number of events %d",entry,nNestedEvents);
351: if (nestedEvents[entry].nstEvent != nstEvent) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs does not match %d",entry,nstEvent);
352: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
353: nParents = nestedEvents[entry].nParents;
355: /* Find the current default timer among the 'dftEvents' of this event */
356: PetscLogEventFindDefaultTimer( dftParentActive, dftEventsSorted, nParents, &pentry);
358: if (pentry>=nParents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Entry %d is larger than number of parents %d",pentry,nParents);
359: if (dftEventsSorted[pentry] != dftParentActive) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d",dftParentActive,dftEventsSorted[pentry]);
361: /* Stop the default timer and update the dftParentActive */
362: PetscLogEventEndDefault(dftParentActive,t,o1,o2,o3,o4);
363: dftParentActive = nestedEvents[entry].dftParents[pentry];
364: return(0);
365: }
367: /*@
368: PetscLogSetThreshold - Set the threshold time for logging the events; this is a percentage out of 100, so 1. means any event
369: that takes 1 or more percent of the time.
371: Logically Collective over PETSC_COMM_WORLD
373: Input Parameter:
374: . newThresh - the threshold to use
376: Output Parameter:
377: . oldThresh - the previously set threshold value
379: Options Database Keys:
380: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
382: Usage:
383: .vb
384: PetscInitialize(...);
385: PetscLogNestedBegin();
386: PetscLogSetThreshold(.1,&oldthresh);
387: ... code ...
388: PetscLogView(viewer);
389: PetscFinalize();
390: .ve
392: Level: advanced
394: .keywords: log, begin
395: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin(),
396: PetscLogNestedBegin()
397: @*/
398: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
399: {
401: if (oldThresh) *oldThresh = threshTime;
402: threshTime = newThresh;
403: return(0);
404: }
406: static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
407: {
408: PetscErrorCode ierr;
409: char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
410: char version[256], buildoptions[128];
411: PetscMPIInt size;
412: MPI_Comm comm;
413: size_t len;
414:
416: PetscObjectGetComm((PetscObject)viewer,&comm);
417: PetscGetArchType(arch,sizeof(arch));
418: PetscGetHostName(hostname,sizeof(hostname));
419: PetscGetUserName(username,sizeof(username));
420: PetscGetProgramName(pname,sizeof(pname));
421: PetscGetDate(date,sizeof(date));
422: PetscGetVersion(version,sizeof(version));
423: MPI_Comm_size(comm, &size);
425: PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification");
426: PetscViewerXMLPutString( viewer, "executable" , "Executable" , pname );
427: PetscViewerXMLPutString( viewer, "architecture", "Architecture" , arch );
428: PetscViewerXMLPutString( viewer, "hostname" , "Host" , hostname);
429: PetscViewerXMLPutInt( viewer, "nprocesses" , "Number of processes", size );
430: PetscViewerXMLPutString( viewer, "user" , "Run by user" , username);
431: PetscViewerXMLPutString( viewer, "date" , "Started at" , date);
432: PetscViewerXMLPutString( viewer, "petscrelease", "Petsc Release", version);
433: #if defined(PETSC_USE_DEBUG)
434: sprintf(buildoptions, "Debug");
435: #else
436: buildoptions[0] = 0;
437: #endif
438: PetscStrlen(buildoptions,&len);
439: if (len) {
440: PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions);
441: }
442: PetscViewerXMLEndSection(viewer, "runspecification");
443: return(0);
444: }
446: /* Print the global performance: max, max/min, average and total of
447: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
448: */
449: static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble max, PetscLogDouble ratio, PetscLogDouble avg, PetscLogDouble tot)
450: {
454: PetscViewerXMLStartSection(viewer, name, desc);
455: PetscViewerXMLPutDouble(viewer, "max", NULL, max, "%e");
456: PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f");
457: if (avg>-1.0) {
458: PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e");
459: }
460: if (tot>-1.0) {
461: PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e");
462: }
463: PetscViewerXMLEndSection(viewer, name);
464: return(0);
465: }
467: /* Print the global performance: max, max/min, average and total of
468: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
469: */
470: static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
471: {
472: PetscErrorCode ierr;
473: PetscLogDouble min, max, tot, ratio, avg;
474: PetscLogDouble flops, mem, red, mess;
475: PetscMPIInt size;
476: MPI_Comm comm;
479: PetscObjectGetComm((PetscObject)viewer,&comm);
480: MPI_Comm_size(comm, &size);
482: /* Must preserve reduction count before we go on */
483: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
485: /* Calculate summary information */
486: PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance");
488: /* Time */
489: MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
490: MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
491: MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
492: avg = (tot)/((PetscLogDouble) size);
493: if (min != 0.0) ratio = max/min;
494: else ratio = 0.0;
495: PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", max, ratio, avg, -1.0);
497: /* Objects */
498: avg = (PetscLogDouble) petsc_numObjects;
499: MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
500: MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
501: MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
502: avg = (tot)/((PetscLogDouble) size);
503: if (min != 0.0) ratio = max/min;
504: else ratio = 0.0;
505: PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", max, ratio, avg, -1.0);
507: /* Flop */
508: MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
509: MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
510: MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
511: avg = (tot)/((PetscLogDouble) size);
512: if (min != 0.0) ratio = max/min;
513: else ratio = 0.0;
514: PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
516: /* Flop/sec -- Must talk to Barry here */
517: if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime;
518: else flops = 0.0;
519: MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
520: MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
521: MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
522: avg = (tot)/((PetscLogDouble) size);
523: if (min != 0.0) ratio = max/min;
524: else ratio = 0.0;
525: PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
527: /* Memory */
528: PetscMallocGetMaximumUsage(&mem);
529: if (mem > 0.0) {
530: MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
531: MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
532: MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
533: avg = (tot)/((PetscLogDouble) size);
534: if (min != 0.0) ratio = max/min;
535: else ratio = 0.0;
536: PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
537: }
538: /* Messages */
539: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
540: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
541: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
542: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
543: avg = (tot)/((PetscLogDouble) size);
544: if (min != 0.0) ratio = max/min;
545: else ratio = 0.0;
546: PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", max, ratio, avg, tot);
548: /* Message Volume */
549: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
550: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
551: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
552: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
553: avg = (tot)/((PetscLogDouble) size);
554: if (min != 0.0) ratio = max/min;
555: else ratio = 0.0;
556: PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
558: /* Reductions */
559: MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
560: MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
561: MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
562: if (min != 0.0) ratio = max/min;
563: else ratio = 0.0;
564: PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", max, ratio, -1, -1);
565: PetscViewerXMLEndSection(viewer, "globalperformance");
566: return(0);
567: }
569: typedef struct {
570: PetscLogEvent dftEvent;
571: NestedEventId nstEvent;
572: PetscLogEvent dftParent;
573: NestedEventId nstParent;
574: PetscBool own;
575: int depth;
576: NestedEventId* nstPath;
577: } PetscNestedEventTree;
579: /* Compare timers to sort them in the tree */
580: static int compareTreeItems(const void *item1_, const void *item2_)
581: {
582: int i;
583: PetscNestedEventTree *item1 = (PetscNestedEventTree *) item1_;
584: PetscNestedEventTree *item2 = (PetscNestedEventTree *) item2_;
585: for (i=0; i<PetscMin(item1->depth,item2->depth); i++) {
586: if (item1->nstPath[i]<item2->nstPath[i]) return -1;
587: if (item1->nstPath[i]>item2->nstPath[i]) return +1;
588: }
589: if (item1->depth < item2->depth) return -1;
590: if (item1->depth > item2->depth) return 1;
591: return 0;
592: }
593: /*
594: * Do MPI communication to get the complete, nested calling tree for all processes: there may be
595: * calls that happen in some processes, but not in others.
596: *
597: * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
598: * The tree is sorted so that the timers can be printed in the order of appearance.
599: *
600: * For tree-items which appear in the trees of multiple processes (which will be most items), the
601: * following rule is followed:
602: * + if information from my own process is available, then that is the information stored in tree.
603: * otherwise it is some other process's information.
604: */
605: static PetscErrorCode PetscCreateLogTreeNested(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
606: {
607: PetscNestedEventTree *tree = NULL, *newTree;
608: int *treeIndices;
609: int nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
610: int yesno;
611: PetscBool done;
612: PetscErrorCode ierr;
613: int maxdepth;
614: int depth;
615: int illegalEvent;
616: int iextra;
617: PetscStageLog stageLog;
618: NestedEventId *nstPath, *nstMyPath;
619: MPI_Comm comm;
622: PetscObjectGetComm((PetscObject)viewer,&comm);
623: PetscLogGetStageLog(&stageLog);
625: /* Calculate memory needed to store everybody's information and allocate tree */
626: nTimers = 0;
627: for (i=0; i<nNestedEvents; i++) nTimers+=nestedEvents[i].nParents;
629: PetscMalloc1(nTimers,&tree);
631: /* Fill tree with readily available information */
632: iTimer0 = 0;
633: maxDefaultTimer =0;
634: for (i=0; i<nNestedEvents; i++) {
635: int nParents = nestedEvents[i].nParents;
636: NestedEventId nstEvent = nestedEvents[i].nstEvent;
637: PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
638: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
639: for (j=0; j<nParents; j++) {
640: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
642: tree[iTimer0+j].dftEvent = dftEvents[j];
643: tree[iTimer0+j].nstEvent = nstEvent;
644: tree[iTimer0+j].dftParent = dftParentsSorted[j];
645: tree[iTimer0+j].own = PETSC_TRUE;
647: tree[iTimer0+j].nstParent = 0;
648: tree[iTimer0+j].depth = 0;
649: tree[iTimer0+j].nstPath = NULL;
650: }
651: iTimer0 += nParents;
652: }
654: /* Calculate the global maximum for the default timer index, so array treeIndices can
655: * be allocated only once */
656: MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm);
657: maxDefaultTimer = j;
659: /* Find default timer's place in the tree */
660: PetscCalloc1(maxDefaultTimer+1,&treeIndices);
661: treeIndices[0] = 0;
662: for (i=0; i<nTimers; i++) {
663: PetscLogEvent dftEvent = tree[i].dftEvent;
664: treeIndices[dftEvent] = i;
665: }
667: /* Find each dftParent's nested identification */
668: for (i=0; i<nTimers; i++) {
669: PetscLogEvent dftParent = tree[i].dftParent;
670: if (dftParent!= DFT_ID_AWAKE) {
671: int j = treeIndices[dftParent];
672: tree[i].nstParent = tree[j].nstEvent;
673: }
674: }
676: /* Find depths for each timer path */
677: done = PETSC_FALSE;
678: maxdepth = 0;
679: while (!done) {
680: done = PETSC_TRUE;
681: for (i=0; i<nTimers; i++) {
682: if (tree[i].dftParent == DFT_ID_AWAKE) {
683: tree[i].depth = 1;
684: maxdepth = PetscMax(1,maxdepth);
685: } else {
686: int j = treeIndices[tree[i].dftParent];
687: depth = 1+tree[j].depth;
688: if (depth>tree[i].depth) {
689: done = PETSC_FALSE;
690: tree[i].depth = depth;
691: maxdepth = PetscMax(depth,maxdepth);
692: }
693: }
694: }
695: }
697: /* Allocate the paths in the entire tree */
698: for (i=0; i<nTimers; i++) {
699: depth = tree[i].depth;
700: PetscCalloc1(depth,&tree[i].nstPath);
701: }
703: /* Calculate the paths for all timers */
704: for (depth=1; depth<=maxdepth; depth++) {
705: for (i=0; i<nTimers; i++) {
706: if (tree[i].depth==depth) {
707: if (depth>1) {
708: int j = treeIndices[tree[i].dftParent];
709: PetscMemcpy(tree[i].nstPath,tree[j].nstPath,(depth-1)*sizeof(NestedEventId));
710: }
711: tree[i].nstPath[depth-1] = tree[i].nstEvent;
712: }
713: }
714: }
715: PetscFree(treeIndices);
717: /* Sort the tree on basis of the paths */
718: qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);
720: /* Allocate an array to store paths */
721: depth = maxdepth;
722: MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm);
723: PetscMalloc1(maxdepth+1, &nstPath);
724: PetscMalloc1(maxdepth+1, &nstMyPath);
726: /* Find an illegal nested event index (1+largest nested event index) */
727: illegalEvent = 1+nestedEvents[nNestedEvents-1].nstEvent;
728: i = illegalEvent;
729: MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm);
731: /* First, detect timers which are not available in this process, but are available in others
732: * Allocate a new tree, that can contain all timers
733: * Then, fill the new tree with all (own and not-own) timers */
734: newTree= NULL;
735: for (yesno=0; yesno<=1; yesno++) {
736: depth = 1;
737: i = 0;
738: iextra = 0;
739: while (depth>0) {
740: int j;
741: PetscBool same;
743: /* Construct the next path in this process's tree:
744: * if necessary, supplement with invalid path entries */
745: depth++;
746: if (depth > maxdepth + 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Depth %d > maxdepth+1 %d",depth,maxdepth+1);
747: if (i<nTimers) {
748: for (j=0; j<tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
749: for (j=tree[i].depth; j<depth; j++) nstMyPath[j] = illegalEvent;
750: } else {
751: for (j=0; j<depth; j++) nstMyPath[j] = illegalEvent;
752: }
753:
754: /* Communicate with other processes to obtain the next path and its depth */
755: MPIU_Allreduce(nstMyPath, nstPath, depth, MPI_INT, MPI_MIN, comm);
756: for (j=depth-1; (int) j>=0; j--) {
757: if (nstPath[j]==illegalEvent) depth=j;
758: }
759:
760: if (depth>0) {
761: /* If the path exists */
762:
763: /* check whether the next path is the same as this process's next path */
764: same = PETSC_TRUE;
765: for (j=0; same && j<depth; j++) { same = (same && nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;}
766:
767: if (same) {
768: /* Register 'own path' */
769: if (newTree) newTree[i+iextra] = tree[i];
770: i++;
771: } else {
772: /* Register 'not an own path' */
773: if (newTree) {
774: newTree[i+iextra].nstEvent = nstPath[depth-1];
775: newTree[i+iextra].own = PETSC_FALSE;
776: newTree[i+iextra].depth = depth;
777: PetscMalloc1(depth, &newTree[i+iextra].nstPath);
778: for (j=0; j<depth; j++) {newTree[i+iextra].nstPath[j] = nstPath[j];}
780: newTree[i+iextra].dftEvent = 0;
781: newTree[i+iextra].dftParent = 0;
782: newTree[i+iextra].nstParent = 0;
783: }
784: iextra++;
785: }
787: }
788: }
790: /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
791: totalNTimers = nTimers + iextra;
792: if (!newTree) {
793: PetscMalloc1(totalNTimers, &newTree);
794: }
795: }
796: PetscFree(nstPath);
797: PetscFree(nstMyPath);
798: PetscFree(tree);
799: tree = newTree;
800: newTree = NULL;
802: /* Set return value and return */
803: *p_tree = tree;
804: *p_nTimers = totalNTimers;
805: return(0);
806: }
808: /*
809: * Delete the nested timer tree
810: */
811: static PetscErrorCode PetscLogFreeNestedTree(PetscNestedEventTree *tree, int nTimers)
812: {
813: int i;
814: PetscErrorCode ierr;
815:
817: for (i=0; i<nTimers; i++) {
818: PetscFree(tree[i].nstPath);
819: }
820: PetscFree(tree);
821: return(0);
822: }
824: /* Print the global performance: max, max/min, average and total of
825: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
826: */
827: static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble minvalue, PetscLogDouble maxvalue, PetscLogDouble minmaxtreshold)
828: {
832: PetscViewerXMLStartSection(viewer, name, NULL);
833: if (maxvalue>minvalue*minmaxtreshold) {
834: PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%f");
835: PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%f");
836: } else {
837: PetscViewerXMLPutDouble(viewer, "value", NULL, (minvalue+maxvalue)/2.0, "%g");
838: };
839: PetscViewerXMLEndSection(viewer, name);
840: return(0);
841: }
843: #define N_COMM 8
844: static PetscErrorCode PetscLogPrintNestedLine(PetscViewer viewer,PetscEventPerfInfo perfInfo,PetscLogDouble countsPerCall,int parentCount,int depth,const char *name,PetscLogDouble totalTime,PetscBool *isPrinted)
845: {
846: PetscLogDouble time = perfInfo.time;
847: PetscLogDouble timeMx, timeMn;
848: PetscLogDouble countsPerCallMx, countsPerCallMn;
849: PetscLogDouble reductSpeedMx, reductSpeedMn;
850: PetscLogDouble flopSpeedMx, flopSpeedMn;
851: PetscLogDouble msgSpeedMx, msgSpeedMn;
852: PetscLogDouble commarr_in[N_COMM], commarr_out[N_COMM];
854: MPI_Comm comm;
857: PetscObjectGetComm((PetscObject)viewer,&comm);
859: commarr_in[0] = time;
860: commarr_in[1] = -time;
861: MPIU_Allreduce(commarr_in, commarr_out, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
862: timeMx = commarr_out[0];
863: timeMn = -commarr_out[1];
865: commarr_in[0] = time>=timeMx*0.001 ? perfInfo.flops/time : 0;
866: commarr_in[1] = time>=timeMx*0.001 ? perfInfo.numReductions/time : 0;
867: commarr_in[2] = time>=timeMx*0.001 ? perfInfo.messageLength/time : 0;
868: commarr_in[3] = parentCount>0 ? countsPerCall : 0;
870: commarr_in[4] = time>=timeMx*0.001 ? -perfInfo.flops/time : -1e30;
871: commarr_in[5] = time>=timeMx*0.001 ? -perfInfo.numReductions/time : -1e30;
872: commarr_in[6] = time>=timeMx*0.001 ? -perfInfo.messageLength/time : -1e30;
873: commarr_in[7] = parentCount>0 ? -countsPerCall : -1e30;
875: MPIU_Allreduce(commarr_in, commarr_out, N_COMM, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
877: flopSpeedMx = commarr_out[0];
878: reductSpeedMx = commarr_out[1];
879: msgSpeedMx = commarr_out[2];
880: countsPerCallMx = commarr_out[3];
882: flopSpeedMn = -commarr_out[4];
883: reductSpeedMn = -commarr_out[5];
884: msgSpeedMn = -commarr_out[6];
885: countsPerCallMn = -commarr_out[7];
887: *isPrinted = ((timeMx/totalTime) > (threshTime/100.0)) ? PETSC_TRUE : PETSC_FALSE;
888: if (isPrinted) {
889: PetscViewerXMLStartSection(viewer, "event", NULL);
890: PetscViewerXMLPutString(viewer, "name", NULL, name);
891: PetscPrintXMLNestedLinePerfResults(viewer, "time", timeMn/totalTime*100.0, timeMx/totalTime*100.0, 1.02);
894: if (countsPerCallMx<1.01 && countsPerCallMn>0.99) {
895: /* One call per parent */
896: } else {
897: PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", countsPerCallMn, countsPerCallMx, 1.02);
898: }
899:
900: if (flopSpeedMx<0.01) {
901: /* NO flops: don't print */
902: } else {
903: PetscPrintXMLNestedLinePerfResults(viewer, "mflops", flopSpeedMn/1e6, flopSpeedMx/1e6, 1.05);
904: }
905:
906: if (msgSpeedMx<0.01) {
907: /* NO msgs: don't print */
908: } else {
909: PetscPrintXMLNestedLinePerfResults(viewer, "mbps", msgSpeedMn/1024.0/1024.0, msgSpeedMx/1024.0/1024.0, 1.05);
910: }
911:
912: if (reductSpeedMx<0.01) {
913: /* NO reductions: don't print */
914: } else {
915: PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", reductSpeedMn, reductSpeedMx, 1.05);
916: }
917: }
918: return(0);
919: }
921: /* Count the number of times the parent event was called */
923: static int countParents( const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
924: {
925: if (tree[i].depth<=1) {
926: return 1; /* Main event: only once */
927: } else if (!tree[i].own) {
928: return 1; /* This event didn't happen in this process, but did in another */
929: } else {
930: int iParent;
931: for (iParent=i-1; iParent>=0; iParent--) {
932: if (tree[iParent].depth == tree[i].depth-1) break;
933: }
934: if (tree[iParent].depth != tree[i].depth-1) {
935: printf("\n\n ***** Internal error: cannot find parent ****\n\n");
936: return -2;
937: } else {
938: PetscLogEvent dftEvent = tree[iParent].dftEvent;
939: return eventPerfInfo[dftEvent].count;
940: }
941: }
942: }
944: typedef struct {
945: int id;
946: PetscLogDouble val;
947: } PetscSortItem;
949: static int compareSortItems(const void *item1_, const void *item2_)
950: {
951: PetscSortItem *item1 = (PetscSortItem *) item1_;
952: PetscSortItem *item2 = (PetscSortItem *) item2_;
953: if (item1->val > item2->val) return -1;
954: if (item1->val < item2->val) return +1;
955: return 0;
956: }
958: static PetscErrorCode PetscLogNestedPrint(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, int iStart, PetscLogDouble totalTime)
959: {
960: int depth = tree[iStart].depth;
961: const char *name;
962: int parentCount, nChildren;
963: PetscSortItem *children;
964: PetscErrorCode ierr;
965: PetscEventPerfInfo *eventPerfInfo;
966: PetscEventPerfInfo myPerfInfo, otherPerfInfo, selfPerfInfo;
967: PetscLogDouble countsPerCall;
968: PetscBool wasPrinted;
969: PetscBool childWasPrinted;
970: MPI_Comm comm;
972: {
973: /* Look up the name of the event and its PerfInfo */
974: const int stage=0;
975: PetscStageLog stageLog;
976: PetscEventRegInfo *eventRegInfo;
977: PetscLogGetStageLog(&stageLog);
978: eventRegInfo = stageLog->eventLog->eventInfo;
979: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
980: name = eventRegInfo[(PetscLogEvent) tree[iStart].nstEvent].name;
981: }
983: PetscObjectGetComm((PetscObject)viewer,&comm);
985: /* Count the number of child processes */
986: nChildren = 0;
987: {
988: int i;
989: for (i=iStart+1; i<nTimers; i++) {
990: if (tree[i].depth<=depth) break;
991: if (tree[i].depth == depth + 1) nChildren++;
992: }
993: }
995: if (nChildren>0) {
996: /* Create an array for the id-s and maxTimes of the children,
997: * leaving 2 spaces for self-time and other-time */
998: int i;
999: PetscLogDouble *times, *maxTimes;
1001: PetscMalloc1(nChildren+2,&children);
1002: nChildren = 0;
1003: for (i=iStart+1; i<nTimers; i++) {
1004: if (tree[i].depth<=depth) break;
1005: if (tree[i].depth == depth + 1) {
1006: children[nChildren].id = i;
1007: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1008: nChildren++;
1009: }
1010: }
1012: /* Calculate the children's maximum times, to see whether children will be ignored or printed */
1013: PetscMalloc1(nChildren,×);
1014: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1016: PetscMalloc1(nChildren,&maxTimes);
1017: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1018: PetscFree(times);
1020: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1021: PetscFree(maxTimes);
1022: }
1024: if (!tree[iStart].own) {
1025: /* Set values for a timer that was not activated in this process
1026: * (but was, in other processes of this run) */
1027: PetscMemzero(&myPerfInfo,sizeof(myPerfInfo));
1029: selfPerfInfo = myPerfInfo;
1030: otherPerfInfo = myPerfInfo;
1032: parentCount = 1;
1033: countsPerCall = 0;
1034: } else {
1035: /* Set the values for a timer that was activated in this process */
1036: int i;
1037: PetscLogEvent dftEvent = tree[iStart].dftEvent;
1039: parentCount = countParents( tree, eventPerfInfo, iStart);
1040: myPerfInfo = eventPerfInfo[dftEvent];
1041: countsPerCall = (PetscLogDouble) myPerfInfo.count / (PetscLogDouble) parentCount;
1043: selfPerfInfo = myPerfInfo;
1044: otherPerfInfo.time = 0;
1045: otherPerfInfo.flops = 0;
1046: otherPerfInfo.numMessages = 0;
1047: otherPerfInfo.messageLength = 0;
1048: otherPerfInfo.numReductions = 0;
1050: for (i=0; i<nChildren; i++) {
1051: /* For all child counters: subtract the child values from self-timers */
1053: PetscLogEvent dftChild = tree[children[i].id].dftEvent;
1054: PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];
1056: selfPerfInfo.time -= childPerfInfo.time;
1057: selfPerfInfo.flops -= childPerfInfo.flops;
1058: selfPerfInfo.numMessages -= childPerfInfo.numMessages;
1059: selfPerfInfo.messageLength -= childPerfInfo.messageLength;
1060: selfPerfInfo.numReductions -= childPerfInfo.numReductions;
1062: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1063: /* Add them to 'other' if the time is ignored in the output */
1064: otherPerfInfo.time += childPerfInfo.time;
1065: otherPerfInfo.flops += childPerfInfo.flops;
1066: otherPerfInfo.numMessages += childPerfInfo.numMessages;
1067: otherPerfInfo.messageLength += childPerfInfo.messageLength;
1068: otherPerfInfo.numReductions += childPerfInfo.numReductions;
1069: }
1070: }
1071: }
1073: /* Main output for this timer */
1074: PetscLogPrintNestedLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted);
1076: /* Now print the lines for the children */
1077: if (nChildren>0) {
1078: /* Calculate max-times for 'self' and 'other' */
1079: int i;
1080: PetscLogDouble times[2], maxTimes[2];
1081: times[0] = selfPerfInfo.time; times[1] = otherPerfInfo.time;
1082: MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1083: children[nChildren+0].id = -1;
1084: children[nChildren+0].val = maxTimes[0];
1085: children[nChildren+1].id = -2;
1086: children[nChildren+1].val = maxTimes[1];
1088: /* Now sort the children (including 'self' and 'other') on total time */
1089: qsort(children, nChildren+2, sizeof(PetscSortItem), compareSortItems);
1091: /* Print (or ignore) the children in ascending order of total time */
1092: PetscViewerXMLStartSection(viewer,"events", NULL);
1093: for (i=0; i<nChildren+2; i++) {
1094: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1095: /* ignored: no output */
1096: } else if (children[i].id==-1) {
1097: PetscLogPrintNestedLine(viewer, selfPerfInfo, 1, parentCount, depth+1, "self", totalTime, &childWasPrinted);
1098: if (childWasPrinted) {
1099: PetscViewerXMLEndSection(viewer,"event");
1100: }
1101: } else if (children[i].id==-2) {
1102: size_t len;
1103: char *otherName;
1105: PetscStrlen(name,&len);
1106: PetscMalloc1(16+len,&otherName);
1107: sprintf(otherName,"%s: other-timed",name);
1108: PetscLogPrintNestedLine(viewer, otherPerfInfo, 1, 1, depth+1, otherName, totalTime, &childWasPrinted);
1109: PetscFree(otherName);
1110: if (childWasPrinted) {
1111: PetscViewerXMLEndSection(viewer,"event");
1112: }
1113: } else {
1114: /* Print the child with a recursive call to this function */
1115: PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1116: }
1117: }
1118: PetscViewerXMLEndSection(viewer,"events");
1119: PetscFree(children);
1120: }
1122: if (wasPrinted) {
1123: PetscViewerXMLEndSection(viewer, "event");
1124: }
1125: return 0;
1126: }
1128: static PetscErrorCode PetscLogNestedPrintTop(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, PetscLogDouble totalTime)
1129: {
1130: int nChildren;
1131: PetscSortItem *children;
1132: PetscErrorCode ierr;
1133: PetscEventPerfInfo *eventPerfInfo;
1134: MPI_Comm comm;
1137: PetscObjectGetComm((PetscObject)viewer,&comm);
1138: {
1139: /* Look up the PerfInfo */
1140: const int stage=0;
1141: PetscStageLog stageLog;
1142: PetscLogGetStageLog(&stageLog);
1143: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1144: }
1146: /* Count the number of child processes, and count total time */
1147: nChildren = 0;
1148: {
1149: int i;
1150: for (i=0; i<nTimers; i++) {
1151: if (tree[i].depth==1) nChildren++;
1152: }
1153: }
1155: if (nChildren>0) {
1156: /* Create an array for the id-s and maxTimes of the children,
1157: * leaving 2 spaces for self-time and other-time */
1158: int i;
1159: PetscLogDouble *times, *maxTimes;
1161: PetscMalloc1(nChildren,&children);
1162: nChildren = 0;
1163: for (i=0; i<nTimers; i++) {
1164: if (tree[i].depth == 1) {
1165: children[nChildren].id = i;
1166: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1167: nChildren++;
1168: }
1169: }
1170:
1171: /* Calculate the children's maximum times, to sort them */
1172: PetscMalloc1(nChildren,×);
1173: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1175: PetscMalloc1(nChildren,&maxTimes);
1176: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1177: PetscFree(times);
1179: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1180: PetscFree(maxTimes);
1182: /* Now sort the children on total time */
1183: qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1184: /* Print (or ignore) the children in ascending order of total time */
1185: PetscViewerXMLStartSection(viewer, "timertree", "Timings tree");
1186: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1187: PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, threshTime, "%f");
1189: for (i=0; i<nChildren; i++) {
1190: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1191: /* ignored: no output */
1192: } else {
1193: /* Print the child with a recursive call to this function */
1194: PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1195: }
1196: }
1197: PetscViewerXMLEndSection(viewer, "timertree");
1198: PetscFree(children);
1199: }
1200: return(0);
1201: }
1203: typedef struct {
1204: char *name;
1205: PetscLogDouble time;
1206: PetscLogDouble flops;
1207: PetscLogDouble numMessages;
1208: PetscLogDouble messageLength;
1209: PetscLogDouble numReductions;
1210: } PetscSelfTimer;
1212: static PetscErrorCode PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1213: {
1214: PetscErrorCode ierr;
1215: PetscEventPerfInfo *eventPerfInfo;
1216: PetscEventRegInfo *eventRegInfo;
1217: PetscSelfTimer *selftimes;
1218: PetscSelfTimer *totaltimes;
1219: NestedEventId *nstEvents;
1220: int i, maxDefaultTimer;
1221: NestedEventId nst;
1222: PetscLogEvent dft;
1223: int nstMax, nstMax_local;
1224: MPI_Comm comm;
1225:
1227: PetscObjectGetComm((PetscObject)viewer,&comm);
1228: {
1229: const int stage=0;
1230: PetscStageLog stageLog;
1231: PetscLogGetStageLog(&stageLog);
1232: eventRegInfo = stageLog->eventLog->eventInfo;
1233: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1234: }
1236: /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1237: maxDefaultTimer =0;
1238: for (i=0; i<nNestedEvents; i++) {
1239: int nParents = nestedEvents[i].nParents;
1240: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1241: int j;
1242: for (j=0; j<nParents; j++) {
1243: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
1244: }
1245: }
1246: PetscMalloc1(maxDefaultTimer+1,&nstEvents);
1247: for (dft=0; dft<maxDefaultTimer; dft++) {nstEvents[dft] = 0;}
1248: for (i=0; i<nNestedEvents; i++) {
1249: int nParents = nestedEvents[i].nParents;
1250: NestedEventId nstEvent = nestedEvents[i].nstEvent;
1251: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1252: int j;
1253: for (j=0; j<nParents; j++) { nstEvents[dftEvents[j]] = nstEvent; }
1254: }
1256: /* Calculate largest nested event-ID */
1257: nstMax_local = 0;
1258: for (i=0; i<nNestedEvents; i++) { if (nestedEvents[i].nstEvent>nstMax_local) {nstMax_local = nestedEvents[i].nstEvent;} }
1259: MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm);
1262: /* Initialize all total-times with zero */
1263: PetscMalloc1(nstMax+1,&selftimes);
1264: PetscMalloc1(nstMax+1,&totaltimes);
1265: for (nst=0; nst<=nstMax; nst++) {
1266: totaltimes[nst].time = 0;
1267: totaltimes[nst].flops = 0;
1268: totaltimes[nst].numMessages = 0;
1269: totaltimes[nst].messageLength = 0;
1270: totaltimes[nst].numReductions = 0;
1271: totaltimes[nst].name = NULL;
1272: }
1274: /* Calculate total-times */
1275: for (i=0; i<nNestedEvents; i++) {
1276: const int nParents = nestedEvents[i].nParents;
1277: const NestedEventId nstEvent = nestedEvents[i].nstEvent;
1278: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1279: int j;
1280: for (j=0; j<nParents; j++) {
1281: const PetscLogEvent dftEvent = dftEvents[j];
1282: totaltimes[nstEvent].time += eventPerfInfo[dftEvent].time;
1283: totaltimes[nstEvent].flops += eventPerfInfo[dftEvent].flops;
1284: totaltimes[nstEvent].numMessages += eventPerfInfo[dftEvent].numMessages;
1285: totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1286: totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1287: }
1288: totaltimes[nstEvent].name = eventRegInfo[(PetscLogEvent) nstEvent].name;
1289: }
1291: /* Initialize: self-times := totaltimes */
1292: for (nst=0; nst<=nstMax; nst++) { selftimes[nst] = totaltimes[nst]; }
1294: /* Subtract timed subprocesses from self-times */
1295: for (i=0; i<nNestedEvents; i++) {
1296: const int nParents = nestedEvents[i].nParents;
1297: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1298: const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1299: int j;
1300: for (j=0; j<nParents; j++) {
1301: if (dftParentsSorted[j] != DFT_ID_AWAKE) {
1302: const PetscLogEvent dftEvent = dftEvents[j];
1303: const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1304: selftimes[nstParent].time -= eventPerfInfo[dftEvent].time;
1305: selftimes[nstParent].flops -= eventPerfInfo[dftEvent].flops;
1306: selftimes[nstParent].numMessages -= eventPerfInfo[dftEvent].numMessages;
1307: selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1308: selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1309: }
1310: }
1311: }
1313: PetscFree(nstEvents);
1314: PetscFree(totaltimes);
1316: /* Set outputs */
1317: *p_self = selftimes;
1318: *p_nstMax = nstMax;
1319: return(0);
1320: }
1322: static PetscErrorCode PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1323: {
1324: PetscErrorCode ierr;
1325: int i;
1326: NestedEventId nst;
1327: PetscSortItem *sortSelfTimes;
1328: PetscLogDouble *times, *maxTimes;
1329: PetscEventRegInfo *eventRegInfo;
1330: const int dum_depth = 1, dum_count=1, dum_parentcount=1;
1331: PetscBool wasPrinted;
1332: MPI_Comm comm;
1335: PetscObjectGetComm((PetscObject)viewer,&comm);
1336: {
1337: PetscStageLog stageLog;
1338: PetscLogGetStageLog(&stageLog);
1339: eventRegInfo = stageLog->eventLog->eventInfo;
1340: }
1342: PetscMalloc1(nstMax+1,×);
1343: PetscMalloc1(nstMax+1,&maxTimes);
1344: for (nst=0; nst<=nstMax; nst++) { times[nst] = selftimes[nst].time;}
1345: MPIU_Allreduce(times, maxTimes, nstMax+1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1346: PetscFree(times);
1348: PetscMalloc1(nstMax+1,&sortSelfTimes);
1350: /* Sort the self-timers on basis of the largest time needed */
1351: for (nst=0; nst<=nstMax; nst++) {
1352: sortSelfTimes[nst].id = nst;
1353: sortSelfTimes[nst].val = maxTimes[nst];
1354: }
1355: PetscFree(maxTimes);
1356: qsort(sortSelfTimes, nstMax+1, sizeof(PetscSortItem), compareSortItems);
1358: PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings");
1359: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1361: for (i=0; i<=nstMax; i++) {
1362: if ((sortSelfTimes[i].val/totalTime) >= (threshTime/100.0)) {
1363: NestedEventId nstEvent = sortSelfTimes[i].id;
1364: PetscEventPerfInfo selfPerfInfo;
1365: const char *name = eventRegInfo[(PetscLogEvent) nstEvent].name;
1367: selfPerfInfo.time = selftimes[nstEvent].time ;
1368: selfPerfInfo.flops = selftimes[nstEvent].flops;
1369: selfPerfInfo.numMessages = selftimes[nstEvent].numMessages;
1370: selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1371: selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1372:
1373: PetscLogPrintNestedLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted);
1374: if (wasPrinted){
1375: PetscViewerXMLEndSection(viewer, "event");
1376: }
1377: }
1378: }
1379: PetscViewerXMLEndSection(viewer, "selftimertable");
1380: PetscFree(sortSelfTimes);
1381: return(0);
1382: }
1384: PetscErrorCode PetscLogView_Nested(PetscViewer viewer)
1385: {
1386: MPI_Comm comm;
1387: PetscErrorCode ierr;
1388: PetscLogDouble locTotalTime, globTotalTime;
1389: PetscNestedEventTree *tree = NULL;
1390: PetscSelfTimer *selftimers = NULL;
1391: int nTimers = 0, nstMax = 0;
1392: PetscViewerType vType;
1395: PetscViewerGetType(viewer,&vType);
1397: /* Set useXMLFormat that controls the format in all local PetscPrint.. functions */
1398: PetscViewerInitASCII_XML(viewer);
1400: PetscObjectGetComm((PetscObject)viewer,&comm);
1402: PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n");
1403: PetscViewerXMLStartSection(viewer, "petscroot", NULL);
1405: /* Get the total elapsed time, local and global maximum */
1406: PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime;
1407: MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1409: /* Print global information about this run */
1410: PetscPrintExeSpecs(viewer);
1411: PetscPrintGlobalPerformance(viewer, locTotalTime);
1412:
1413: /* Collect nested timer tree info from all processes */
1414: PetscCreateLogTreeNested(viewer, &tree, &nTimers);
1415: PetscLogNestedPrintTop(viewer, tree, nTimers, globTotalTime);
1416: PetscLogFreeNestedTree(tree, nTimers);
1418: /* Calculate self-time for all (not-nested) events */
1419: PetscCalcSelfTime(viewer, &selftimers, &nstMax);
1420: PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime);
1421: PetscFree(selftimers);
1423: PetscViewerXMLEndSection(viewer, "petscroot");
1424: PetscViewerFinalASCII_XML(viewer);
1425: PetscLogNestedEnd();
1426: return(0);
1427: }
1429: PETSC_EXTERN PetscErrorCode PetscASend(int count, int datatype)
1430: {
1433: #endif
1436: petsc_send_ct++;
1438: PetscMPITypeSize(&petsc_send_len,count, MPI_Type_f2c((MPI_Fint) datatype));
1439: #endif
1440: return(0);
1441: }
1443: PETSC_EXTERN PetscErrorCode PetscARecv(int count, int datatype)
1444: {
1447: #endif
1450: petsc_recv_ct++;
1452: PetscMPITypeSize(&petsc_recv_len,count, MPI_Type_f2c((MPI_Fint) datatype));
1453: #endif
1454: return(0);
1455: }
1457: PETSC_EXTERN PetscErrorCode PetscAReduce()
1458: {
1460: petsc_allreduce_ct++;
1461: return(0);
1462: }
1464: #endif