Actual source code: xmllogevent.c
petsc-3.8.3 2017-12-09
1: /*************************************************************************************
2: * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S *
3: *************************************************************************************
4: * authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij *
5: *************************************************************************************
6: * content: Support for nested PetscTimers *
7: *************************************************************************************/
8: #include <petsclog.h>
9: #include <petsc/private/logimpl.h>
10: #include <petsctime.h>
11: #include <petscviewer.h>
12: #include "../src/sys/logging/xmllogevent.h"
13: #include "../src/sys/logging/xmlviewer.h"
15: #if defined(PETSC_USE_LOG)
17: /*
18: * Support for nested PetscTimers
19: *
20: * PetscTimers keep track of a lot of useful information: Wall clock times,
21: * message passing statistics, flop counts. Information about the nested structure
22: * of the timers is lost. Example:
23: *
24: * 7:30 Start: awake
25: * 7:30 Start: morning routine
26: * 7:40 Start: eat
27: * 7:49 Done: eat
28: * 7:43 Done: morning routine
29: * 8:15 Start: work
30: * 12:15 Start: eat
31: * 12:45 Done: eat
32: * 16:00 Done: work
33: * 16:30 Start: evening routine
34: * 18:30 Start: eat
35: * 19:15 Done: eat
36: * 22:00 Done: evening routine
37: * 22:00 Done: awake
38: *
39: * Petsc timers provide the following timer results:
40: *
41: * awake: 1 call 14:30 hours
42: * morning routine: 1 call 0:13 hours
43: * eat: 3 calls 1:24 hours
44: * work: 1 call 7:45 hours
45: * evening routine 1 call 5:30 hours
46: *
47: * Nested timers can be used to get the following table:
48: *
49: * [1 call]: awake 14:30 hours
50: * [1 call]: morning routine 0:13 hours ( 2 % of awake)
51: * [1 call]: eat 0:09 hours (69 % of morning routine)
52: * rest (morning routine) 0:04 hours (31 % of morning routine)
53: * [1 call]: work 7:45 hours (53 % of awake)
54: * [1 call]: eat 0:30 hours ( 6 % of work)
55: * rest (work) 7:15 hours (94 % of work)
56: * [1 call]: evening routine 5:30 hours (38 % of awake)
57: * [1 call]: eat 0:45 hours (14 % of evening routine)
58: * rest (evening routine) 4:45 hours (86 % of morning routine)
59: *
60: * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least,
61: * the nested timers make the stages unnecessary.
62: *
63: */
65: /*
66: * Data structures for keeping track of nested timers:
67: *
68: * nestedEvents: information about the timers that have actually been activated
69: * dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
70: *
71: * The Default-timers are used to time the nested timers. Every nested timer corresponds to
72: * (one or more) default timers, where one of the default timers has the same event-id as the
73: * nested one.
74: *
75: * Because of the risk of confusion between nested timer ids and default timer ids, we
76: * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent
77: * only for default events. Also, all nested event variables are prepended with 'nst', and
78: * default timers with 'dft'.
79: */
81: #define DFT_ID_AWAKE -1
83: typedef PetscLogEvent NestedEventId;
84: typedef struct {
85: NestedEventId nstEvent; /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
86: int nParents; /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
87: PetscLogEvent *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
88: PetscLogEvent *dftEvents; /* The default timers which represent the different 'instances' of this nested event */
90: PetscLogEvent *dftParents; /* The default timers which were the dftParentActive when this nested event was started */
91: PetscLogEvent *dftEventsSorted; /* The default timers which represent the different 'instances' of this nested event */
92: } PetscNestedEvent;
94: static PetscLogEvent dftParentActive = DFT_ID_AWAKE;
95: static int nNestedEvents = 0;
96: static int nNestedEventsAllocated = 0;
97: static PetscNestedEvent *nestedEvents = NULL;
98: static PetscLogDouble threshTime = 0.01; /* initial value was .1 */
100: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
101: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
103: PetscErrorCode PetscLogNestedBegin(void)
104: {
105: PetscErrorCode ierr;
107: if (nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents already allocated");
109: nNestedEventsAllocated=10;
110: PetscMalloc1(nNestedEventsAllocated,&nestedEvents);
113: dftParentActive = DFT_ID_AWAKE;
114: nNestedEvents =1;
116: /* 'Awake' is nested event 0. It has no parents */
117: nestedEvents[0].nstEvent = 0;
118: nestedEvents[0].nParents = 0;
119: nestedEvents[0].dftParentsSorted = NULL;
120: nestedEvents[0].dftEvents = NULL;
121: nestedEvents[0].dftParents = NULL;
122: nestedEvents[0].dftEventsSorted = NULL;
124: PetscLogSet(PetscLogEventBeginNested, PetscLogEventEndNested);
125: return(0);
126: }
128: /* Delete the data structures for the nested timers */
129: PetscErrorCode PetscLogNestedEnd(void)
130: {
131: PetscErrorCode ierr;
132: int i;
135: if (!nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents does not exist");
137: for (i=0; i<nNestedEvents; i++) {
138: PetscFree4(nestedEvents[i].dftParentsSorted,nestedEvents[i].dftEventsSorted,nestedEvents[i].dftParents,nestedEvents[i].dftEvents);
139: }
140: PetscFree(nestedEvents);
141: nestedEvents = NULL;
142: nNestedEvents = 0;
143: nNestedEventsAllocated = 0;
144: return(0);
145: }
148: /*
149: * UTILITIES: FIND STUFF IN SORTED ARRAYS
150: *
151: * Utility: find a default timer in a sorted array */
152: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex, /* index to be found */
153: const PetscLogEvent *dftArray, /* sorted array of PetscLogEvent-ids */
154: int narray, /* dimension of dftArray */
155: int *entry) /* entry in the array where dftIndex may be found;
156: * if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray
157: * In that case, the dftIndex can be inserted at this entry. */
158: {
160: if (narray==0 || dftIndex <= dftArray[0]) {
161: *entry = 0;
162: } else if (dftIndex > dftArray[narray-1]) {
163: *entry = narray;
164: } else {
165: int ihigh=narray-1, ilow=0;
166: while (ihigh>ilow) {
167: const int imiddle = (ihigh+ilow)/2;
168: if (dftArray[imiddle] > dftIndex) {
169: ihigh=imiddle;
170: } else if (dftArray[imiddle]<dftIndex) {
171: ilow =imiddle+1;
172: } else {
173: ihigh=imiddle;
174: ilow =imiddle;
175: }
176: }
177: *entry = ihigh;
178: }
179: return(0);
180: }
182: /* Utility: find the nested event with given identification */
183: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent, /* Nested event to be found */
184: int *entry) /* entry in the nestedEvents where nstEvent may be found;
185: if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray */
186: {
189: if (nNestedEvents==0 || nstEvent <= nestedEvents[0].nstEvent) {
190: *entry = 0;
191: } else if (nstEvent > nestedEvents[nNestedEvents-1].nstEvent) {
192: *entry = nNestedEvents;
193: } else {
194: int ihigh=nNestedEvents-1, ilow=0;
195: while (ihigh>ilow) {
196: const int imiddle = (ihigh+ilow)/2;
197: if (nestedEvents[imiddle].nstEvent > nstEvent) {
198: ihigh=imiddle;
199: } else if (nestedEvents[imiddle].nstEvent<nstEvent) {
200: ilow =imiddle+1;
201: } else {
202: ihigh=imiddle;
203: ilow =imiddle;
204: }
205: }
206: *entry = ihigh;
207: }
208: return(0);
209: }
211: /******************************************************************************************/
212: /* Start a nested event */
213: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
214: {
215: PetscErrorCode ierr;
216: int entry, pentry, tentry,i;
217: PetscLogEvent dftEvent;
220: PetscLogEventFindNestedTimer(nstEvent, &entry);
221: if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
222: /* Nested event doesn't exist yet: create it */
224: if (nNestedEvents==nNestedEventsAllocated) {
225: /* Enlarge and re-allocate nestedEvents if needed */
226: PetscNestedEvent *tmp = nestedEvents;
227: PetscMalloc1(2*nNestedEvents,&nestedEvents);
228: nNestedEventsAllocated*=2;
229: PetscMemcpy(nestedEvents, tmp, nNestedEvents*sizeof(PetscNestedEvent));
230: PetscFree(tmp);
231: }
233: /* Clear space in nestedEvents for new nested event */
234: nNestedEvents++;
235: for (i = nNestedEvents-1; i>entry; i--) {
236: nestedEvents[i] = nestedEvents[i-1];
237: }
239: /* Create event in nestedEvents */
240: nestedEvents[entry].nstEvent = nstEvent;
241: nestedEvents[entry].nParents=1;
242: PetscMalloc4(1,&nestedEvents[entry].dftParentsSorted,1,&nestedEvents[entry].dftEventsSorted,1,&nestedEvents[entry].dftParents,1,&nestedEvents[entry].dftEvents);
244: /* Fill in new event */
245: pentry = 0;
246: dftEvent = (PetscLogEvent) nstEvent;
248: nestedEvents[entry].nstEvent = nstEvent;
249: nestedEvents[entry].dftParents[pentry] = dftParentActive;
250: nestedEvents[entry].dftEvents[pentry] = dftEvent;
251: nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
252: nestedEvents[entry].dftEventsSorted[pentry] = dftEvent;
254: } else {
255: /* Nested event exists: find current dftParentActive among parents */
256: PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
257: PetscLogEvent *dftEvents = nestedEvents[entry].dftEvents;
258: int nParents = nestedEvents[entry].nParents;
260: PetscLogEventFindDefaultTimer( dftParentActive, dftParentsSorted, nParents, &pentry);
262: if (pentry>=nParents || dftParentActive != dftParentsSorted[pentry]) {
263: /* dftParentActive not in the list: add it to the list */
264: int i;
265: PetscLogEvent *dftParents = nestedEvents[entry].dftParents;
266: PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
267: char name[100];
269: /* Register a new default timer */
270: sprintf(name, "%d -> %d", (int) dftParentActive, (int) nstEvent);
271: PetscLogEventRegister(name, 0, &dftEvent);
272: PetscLogEventFindDefaultTimer( dftEvent, dftEventsSorted, nParents, &tentry);
274: /* Reallocate parents and dftEvents to make space for new parent */
275: PetscMalloc4(1+nParents,&nestedEvents[entry].dftParentsSorted,1+nParents,&nestedEvents[entry].dftEventsSorted,1+nParents,&nestedEvents[entry].dftParents,1+nParents,&nestedEvents[entry].dftEvents);
276: PetscMemcpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents*sizeof(PetscLogEvent));
277: PetscMemcpy(nestedEvents[entry].dftEventsSorted, dftEventsSorted, nParents*sizeof(PetscLogEvent));
278: PetscMemcpy(nestedEvents[entry].dftParents, dftParents, nParents*sizeof(PetscLogEvent));
279: PetscMemcpy(nestedEvents[entry].dftEvents, dftEvents, nParents*sizeof(PetscLogEvent));
280: PetscFree4(dftParentsSorted,dftEventsSorted,dftParents,dftEvents);
282: dftParents = nestedEvents[entry].dftParents;
283: dftEvents = nestedEvents[entry].dftEvents;
284: dftParentsSorted = nestedEvents[entry].dftParentsSorted;
285: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
287: nestedEvents[entry].nParents++;
288: nParents++;
290: for (i = nParents-1; i>pentry; i--) {
291: dftParentsSorted[i] = dftParentsSorted[i-1];
292: dftEvents[i] = dftEvents[i-1];
293: }
294: for (i = nParents-1; i>tentry; i--) {
295: dftParents[i] = dftParents[i-1];
296: dftEventsSorted[i] = dftEventsSorted[i-1];
297: }
299: /* Fill in the new default timer */
300: dftParentsSorted[pentry] = dftParentActive;
301: dftEvents[pentry] = dftEvent;
302: dftParents[tentry] = dftParentActive;
303: dftEventsSorted[tentry] = dftEvent;
305: } else {
306: /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
307: dftEvent = nestedEvents[entry].dftEvents[pentry];
308: }
309: }
311: /* Start the default 'dftEvent'-timer and update the dftParentActive */
312: PetscLogEventBeginDefault(dftEvent,t,o1,o2,o3,o4);
313: dftParentActive = dftEvent;
314: return(0);
315: }
317: /* End a nested event */
318: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
319: {
320: PetscErrorCode ierr;
321: int entry, pentry, nParents;
322: PetscLogEvent *dftEventsSorted;
325: /* Find the nested event */
326: PetscLogEventFindNestedTimer(nstEvent, &entry);
327: if (entry>=nNestedEvents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d larger than number of events %d",entry,nNestedEvents);
328: if (nestedEvents[entry].nstEvent != nstEvent) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs does not match %d",entry,nstEvent);
329: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
330: nParents = nestedEvents[entry].nParents;
332: /* Find the current default timer among the 'dftEvents' of this event */
333: PetscLogEventFindDefaultTimer( dftParentActive, dftEventsSorted, nParents, &pentry);
335: if (pentry>=nParents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Entry %d is larger than number of parents %d",pentry,nParents);
336: if (dftEventsSorted[pentry] != dftParentActive) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d",dftParentActive,dftEventsSorted[pentry]);
338: /* Stop the default timer and update the dftParentActive */
339: PetscLogEventEndDefault(dftParentActive,t,o1,o2,o3,o4);
340: dftParentActive = nestedEvents[entry].dftParents[pentry];
341: return(0);
342: }
344: /* Set the threshold time for logging the events
345: */
346: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
347: {
349: *oldThresh = threshTime;
350: threshTime = newThresh;
351: return(0);
352: }
354: static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
355: {
356: PetscErrorCode ierr;
357: char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
358: char version[256], buildoptions[128];
359: PetscMPIInt size;
360: MPI_Comm comm;
361: size_t len;
362:
364: PetscObjectGetComm((PetscObject)viewer,&comm);
365: PetscGetArchType(arch,sizeof(arch));
366: PetscGetHostName(hostname,sizeof(hostname));
367: PetscGetUserName(username,sizeof(username));
368: PetscGetProgramName(pname,sizeof(pname));
369: PetscGetDate(date,sizeof(date));
370: PetscGetVersion(version,sizeof(version));
371: MPI_Comm_size(comm, &size);
373: PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification");
374: PetscViewerXMLPutString( viewer, "executable" , "Executable" , pname );
375: PetscViewerXMLPutString( viewer, "architecture", "Architecture" , arch );
376: PetscViewerXMLPutString( viewer, "hostname" , "Host" , hostname);
377: PetscViewerXMLPutInt( viewer, "nprocesses" , "Number of processes", size );
378: PetscViewerXMLPutString( viewer, "user" , "Run by user" , username);
379: PetscViewerXMLPutString( viewer, "date" , "Started at" , date);
380: PetscViewerXMLPutString( viewer, "petscrelease", "Petsc Release", version);
381: #if defined(PETSC_USE_DEBUG)
382: sprintf(buildoptions, "Debug");
383: #else
384: buildoptions[0] = 0;
385: #endif
386: PetscStrlen(buildoptions,&len);
387: if (len) {
388: PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions);
389: }
390: PetscViewerXMLEndSection(viewer, "runspecification");
391: return(0);
392: }
394: /* Print the global performance: max, max/min, average and total of
395: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
396: */
397: static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble max, PetscLogDouble ratio, PetscLogDouble avg, PetscLogDouble tot)
398: {
402: PetscViewerXMLStartSection(viewer, name, desc);
403: PetscViewerXMLPutDouble(viewer, "max", NULL, max, "%e");
404: PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f");
405: if (avg>-1.0) {
406: PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e");
407: }
408: if (tot>-1.0) {
409: PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e");
410: }
411: PetscViewerXMLEndSection(viewer, name);
412: return(0);
413: }
415: /* Print the global performance: max, max/min, average and total of
416: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
417: */
418: static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
419: {
420: PetscErrorCode ierr;
421: PetscLogDouble min, max, tot, ratio, avg;
422: PetscLogDouble flops, mem, red, mess;
423: PetscMPIInt size;
424: MPI_Comm comm;
427: PetscObjectGetComm((PetscObject)viewer,&comm);
428: MPI_Comm_size(comm, &size);
430: /* Must preserve reduction count before we go on */
431: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
433: /* Calculate summary information */
434: PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance");
436: /* Time */
437: MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
438: MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
439: MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
440: avg = (tot)/((PetscLogDouble) size);
441: if (min != 0.0) ratio = max/min;
442: else ratio = 0.0;
443: PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", max, ratio, avg, -1.0);
445: /* Objects */
446: avg = (PetscLogDouble) petsc_numObjects;
447: MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
448: MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
449: MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
450: avg = (tot)/((PetscLogDouble) size);
451: if (min != 0.0) ratio = max/min;
452: else ratio = 0.0;
453: PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", max, ratio, avg, -1.0);
455: /* Flop */
456: MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
457: MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
458: MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
459: avg = (tot)/((PetscLogDouble) size);
460: if (min != 0.0) ratio = max/min;
461: else ratio = 0.0;
462: PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
464: /* Flop/sec -- Must talk to Barry here */
465: if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime;
466: else flops = 0.0;
467: MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
468: MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
469: MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
470: avg = (tot)/((PetscLogDouble) size);
471: if (min != 0.0) ratio = max/min;
472: else ratio = 0.0;
473: PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
475: /* Memory */
476: PetscMallocGetMaximumUsage(&mem);
477: if (mem > 0.0) {
478: MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
479: MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
480: MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
481: avg = (tot)/((PetscLogDouble) size);
482: if (min != 0.0) ratio = max/min;
483: else ratio = 0.0;
484: PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
485: }
486: /* Messages */
487: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
488: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
489: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
490: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
491: avg = (tot)/((PetscLogDouble) size);
492: if (min != 0.0) ratio = max/min;
493: else ratio = 0.0;
494: PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", max, ratio, avg, tot);
496: /* Message Volume */
497: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
498: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
499: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
500: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
501: avg = (tot)/((PetscLogDouble) size);
502: if (min != 0.0) ratio = max/min;
503: else ratio = 0.0;
504: PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
506: /* Reductions */
507: MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
508: MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
509: MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
510: if (min != 0.0) ratio = max/min;
511: else ratio = 0.0;
512: PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", max, ratio, -1, -1);
513: PetscViewerXMLEndSection(viewer, "globalperformance");
514: return(0);
515: }
517: typedef struct {
518: PetscLogEvent dftEvent;
519: NestedEventId nstEvent;
520: PetscLogEvent dftParent;
521: NestedEventId nstParent;
522: PetscBool own;
523: int depth;
524: NestedEventId* nstPath;
525: } PetscNestedEventTree;
527: /* Compare timers to sort them in the tree */
528: static int compareTreeItems(const void *item1_, const void *item2_)
529: {
530: int i;
531: PetscNestedEventTree *item1 = (PetscNestedEventTree *) item1_;
532: PetscNestedEventTree *item2 = (PetscNestedEventTree *) item2_;
533: for (i=0; i<PetscMin(item1->depth,item2->depth); i++) {
534: if (item1->nstPath[i]<item2->nstPath[i]) return -1;
535: if (item1->nstPath[i]>item2->nstPath[i]) return +1;
536: }
537: if (item1->depth < item2->depth) return -1;
538: if (item1->depth > item2->depth) return 1;
539: return 0;
540: }
541: /*
542: * Do MPI communication to get the complete, nested calling tree for all processes: there may be
543: * calls that happen in some processes, but not in others.
544: *
545: * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
546: * The tree is sorted so that the timers can be printed in the order of appearance.
547: *
548: * For tree-items which appear in the trees of multiple processes (which will be most items), the
549: * following rule is followed:
550: * + if information from my own process is available, then that is the information stored in tree.
551: * otherwise it is some other process's information.
552: */
553: static PetscErrorCode PetscCreateLogTreeNested(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
554: {
555: PetscNestedEventTree *tree = NULL, *newTree;
556: int *treeIndices;
557: int nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
558: int yesno;
559: PetscBool done;
560: PetscErrorCode ierr;
561: int maxdepth;
562: int depth;
563: int illegalEvent;
564: int iextra;
565: PetscStageLog stageLog;
566: NestedEventId *nstPath, *nstMyPath;
567: MPI_Comm comm;
570: PetscObjectGetComm((PetscObject)viewer,&comm);
571: PetscLogGetStageLog(&stageLog);
573: /* Calculate memory needed to store everybody's information and allocate tree */
574: nTimers = 0;
575: for (i=0; i<nNestedEvents; i++) nTimers+=nestedEvents[i].nParents;
577: PetscMalloc1(nTimers,&tree);
579: /* Fill tree with readily available information */
580: iTimer0 = 0;
581: maxDefaultTimer =0;
582: for (i=0; i<nNestedEvents; i++) {
583: int nParents = nestedEvents[i].nParents;
584: NestedEventId nstEvent = nestedEvents[i].nstEvent;
585: PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
586: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
587: for (j=0; j<nParents; j++) {
588: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
590: tree[iTimer0+j].dftEvent = dftEvents[j];
591: tree[iTimer0+j].nstEvent = nstEvent;
592: tree[iTimer0+j].dftParent = dftParentsSorted[j];
593: tree[iTimer0+j].own = PETSC_TRUE;
595: tree[iTimer0+j].nstParent = 0;
596: tree[iTimer0+j].depth = 0;
597: tree[iTimer0+j].nstPath = NULL;
598: }
599: iTimer0 += nParents;
600: }
602: /* Calculate the global maximum for the default timer index, so array treeIndices can
603: * be allocated only once */
604: MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm);
605: maxDefaultTimer = j;
607: /* Find default timer's place in the tree */
608: PetscCalloc1(maxDefaultTimer+1,&treeIndices);
609: treeIndices[0] = 0;
610: for (i=0; i<nTimers; i++) {
611: PetscLogEvent dftEvent = tree[i].dftEvent;
612: treeIndices[dftEvent] = i;
613: }
615: /* Find each dftParent's nested identification */
616: for (i=0; i<nTimers; i++) {
617: PetscLogEvent dftParent = tree[i].dftParent;
618: if (dftParent!= DFT_ID_AWAKE) {
619: int j = treeIndices[dftParent];
620: tree[i].nstParent = tree[j].nstEvent;
621: }
622: }
624: /* Find depths for each timer path */
625: done = PETSC_FALSE;
626: maxdepth = 0;
627: while (!done) {
628: done = PETSC_TRUE;
629: for (i=0; i<nTimers; i++) {
630: if (tree[i].dftParent == DFT_ID_AWAKE) {
631: tree[i].depth = 1;
632: maxdepth = PetscMax(1,maxdepth);
633: } else {
634: int j = treeIndices[tree[i].dftParent];
635: depth = 1+tree[j].depth;
636: if (depth>tree[i].depth) {
637: done = PETSC_FALSE;
638: tree[i].depth = depth;
639: maxdepth = PetscMax(depth,maxdepth);
640: }
641: }
642: }
643: }
645: /* Allocate the paths in the entire tree */
646: for (i=0; i<nTimers; i++) {
647: depth = tree[i].depth;
648: PetscCalloc1(depth,&tree[i].nstPath);
649: }
651: /* Calculate the paths for all timers */
652: for (depth=1; depth<=maxdepth; depth++) {
653: for (i=0; i<nTimers; i++) {
654: if (tree[i].depth==depth) {
655: if (depth>1) {
656: int j = treeIndices[tree[i].dftParent];
657: PetscMemcpy(tree[i].nstPath,tree[j].nstPath,(depth-1)*sizeof(NestedEventId));
658: }
659: tree[i].nstPath[depth-1] = tree[i].nstEvent;
660: }
661: }
662: }
663: PetscFree(treeIndices);
665: /* Sort the tree on basis of the paths */
666: qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);
668: /* Allocate an array to store paths */
669: depth = maxdepth;
670: MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm);
671: PetscMalloc1(maxdepth+1, &nstPath);
672: PetscMalloc1(maxdepth+1, &nstMyPath);
674: /* Find an illegal nested event index (1+largest nested event index) */
675: illegalEvent = 1+nestedEvents[nNestedEvents-1].nstEvent;
676: i = illegalEvent;
677: MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm);
679: /* First, detect timers which are not available in this process, but are available in others
680: * Allocate a new tree, that can contain all timers
681: * Then, fill the new tree with all (own and not-own) timers */
682: newTree= NULL;
683: for (yesno=0; yesno<=1; yesno++) {
684: depth = 1;
685: i = 0;
686: iextra = 0;
687: while (depth>0) {
688: int j;
689: PetscBool same;
691: /* Construct the next path in this process's tree:
692: * if necessary, supplement with invalid path entries */
693: depth++;
694: if (depth > maxdepth + 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Depth %d > maxdepth+1 %d",depth,maxdepth+1);
695: if (i<nTimers) {
696: for (j=0; j<tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
697: for (j=tree[i].depth; j<depth; j++) nstMyPath[j] = illegalEvent;
698: } else {
699: for (j=0; j<depth; j++) nstMyPath[j] = illegalEvent;
700: }
701:
702: /* Communicate with other processes to obtain the next path and its depth */
703: MPIU_Allreduce(nstMyPath, nstPath, depth, MPI_INT, MPI_MIN, comm);
704: for (j=depth-1; (int) j>=0; j--) {
705: if (nstPath[j]==illegalEvent) depth=j;
706: }
707:
708: if (depth>0) {
709: /* If the path exists */
710:
711: /* check whether the next path is the same as this process's next path */
712: same = PETSC_TRUE;
713: for (j=0; same && j<depth; j++) { same = (same && nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;}
714:
715: if (same) {
716: /* Register 'own path' */
717: if (newTree) newTree[i+iextra] = tree[i];
718: i++;
719: } else {
720: /* Register 'not an own path' */
721: if (newTree) {
722: newTree[i+iextra].nstEvent = nstPath[depth-1];
723: newTree[i+iextra].own = PETSC_FALSE;
724: newTree[i+iextra].depth = depth;
725: PetscMalloc1(depth, &newTree[i+iextra].nstPath);
726: for (j=0; j<depth; j++) {newTree[i+iextra].nstPath[j] = nstPath[j];}
728: newTree[i+iextra].dftEvent = 0;
729: newTree[i+iextra].dftParent = 0;
730: newTree[i+iextra].nstParent = 0;
731: }
732: iextra++;
733: }
735: }
736: }
738: /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
739: totalNTimers = nTimers + iextra;
740: if (!newTree) {
741: PetscMalloc1(totalNTimers, &newTree);
742: }
743: }
744: PetscFree(nstPath);
745: PetscFree(nstMyPath);
746: PetscFree(tree);
747: tree = newTree;
748: newTree = NULL;
750: /* Set return value and return */
751: *p_tree = tree;
752: *p_nTimers = totalNTimers;
753: return(0);
754: }
756: /*
757: * Delete the nested timer tree
758: */
759: static PetscErrorCode PetscLogFreeNestedTree(PetscNestedEventTree *tree, int nTimers)
760: {
761: int i;
762: PetscErrorCode ierr;
763:
765: for (i=0; i<nTimers; i++) {
766: PetscFree(tree[i].nstPath);
767: }
768: PetscFree(tree);
769: return(0);
770: }
772: /* Print the global performance: max, max/min, average and total of
773: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
774: */
775: static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble minvalue, PetscLogDouble maxvalue, PetscLogDouble minmaxtreshold)
776: {
780: PetscViewerXMLStartSection(viewer, name, NULL);
781: if (maxvalue>minvalue*minmaxtreshold) {
782: PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%f");
783: PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%f");
784: } else {
785: PetscViewerXMLPutDouble(viewer, "value", NULL, (minvalue+maxvalue)/2.0, "%g");
786: };
787: PetscViewerXMLEndSection(viewer, name);
788: return(0);
789: }
791: #define N_COMM 8
792: static PetscErrorCode PetscLogPrintNestedLine(PetscViewer viewer,PetscEventPerfInfo perfInfo,PetscLogDouble countsPerCall,int parentCount,int depth,const char *name,PetscLogDouble totalTime,PetscBool *isPrinted)
793: {
794: PetscLogDouble time = perfInfo.time;
795: PetscLogDouble timeMx, timeMn;
796: PetscLogDouble countsPerCallMx, countsPerCallMn;
797: PetscLogDouble reductSpeedMx, reductSpeedMn;
798: PetscLogDouble flopSpeedMx, flopSpeedMn;
799: PetscLogDouble msgSpeedMx, msgSpeedMn;
800: PetscLogDouble commarr_in[N_COMM], commarr_out[N_COMM];
802: MPI_Comm comm;
805: PetscObjectGetComm((PetscObject)viewer,&comm);
807: commarr_in[0] = time;
808: commarr_in[1] = -time;
809: MPIU_Allreduce(commarr_in, commarr_out, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
810: timeMx = commarr_out[0];
811: timeMn = -commarr_out[1];
813: commarr_in[0] = time>=timeMx*0.001 ? perfInfo.flops/time : 0;
814: commarr_in[1] = time>=timeMx*0.001 ? perfInfo.numReductions/time : 0;
815: commarr_in[2] = time>=timeMx*0.001 ? perfInfo.messageLength/time : 0;
816: commarr_in[3] = parentCount>0 ? countsPerCall : 0;
818: commarr_in[4] = time>=timeMx*0.001 ? -perfInfo.flops/time : -1e30;
819: commarr_in[5] = time>=timeMx*0.001 ? -perfInfo.numReductions/time : -1e30;
820: commarr_in[6] = time>=timeMx*0.001 ? -perfInfo.messageLength/time : -1e30;
821: commarr_in[7] = parentCount>0 ? -countsPerCall : -1e30;
823: MPIU_Allreduce(commarr_in, commarr_out, N_COMM, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
825: flopSpeedMx = commarr_out[0];
826: reductSpeedMx = commarr_out[1];
827: msgSpeedMx = commarr_out[2];
828: countsPerCallMx = commarr_out[3];
830: flopSpeedMn = -commarr_out[4];
831: reductSpeedMn = -commarr_out[5];
832: msgSpeedMn = -commarr_out[6];
833: countsPerCallMn = -commarr_out[7];
835: *isPrinted = ((timeMx/totalTime) > (threshTime/100.0)) ? PETSC_TRUE : PETSC_FALSE;
836: if (isPrinted) {
837: PetscViewerXMLStartSection(viewer, "event", NULL);
838: PetscViewerXMLPutString(viewer, "name", NULL, name);
839: PetscPrintXMLNestedLinePerfResults(viewer, "time", timeMn/totalTime*100.0, timeMx/totalTime*100.0, 1.02);
842: if (countsPerCallMx<1.01 && countsPerCallMn>0.99) {
843: /* One call per parent */
844: } else {
845: PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", countsPerCallMn, countsPerCallMx, 1.02);
846: }
847:
848: if (flopSpeedMx<0.01) {
849: /* NO flops: don't print */
850: } else {
851: PetscPrintXMLNestedLinePerfResults(viewer, "mflops", flopSpeedMn/1e6, flopSpeedMx/1e6, 1.05);
852: }
853:
854: if (msgSpeedMx<0.01) {
855: /* NO msgs: don't print */
856: } else {
857: PetscPrintXMLNestedLinePerfResults(viewer, "mbps", msgSpeedMn/1024.0/1024.0, msgSpeedMx/1024.0/1024.0, 1.05);
858: }
859:
860: if (reductSpeedMx<0.01) {
861: /* NO reductions: don't print */
862: } else {
863: PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", reductSpeedMn, reductSpeedMx, 1.05);
864: }
865: }
866: return(0);
867: }
869: /* Count the number of times the parent event was called */
871: static int countParents( const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
872: {
873: if (tree[i].depth<=1) {
874: return 1; /* Main event: only once */
875: } else if (!tree[i].own) {
876: return 1; /* This event didn't happen in this process, but did in another */
877: } else {
878: int iParent;
879: for (iParent=i-1; iParent>=0; iParent--) {
880: if (tree[iParent].depth == tree[i].depth-1) break;
881: }
882: if (tree[iParent].depth != tree[i].depth-1) {
883: printf("\n\n ***** Internal error: cannot find parent ****\n\n");
884: return -2;
885: } else {
886: PetscLogEvent dftEvent = tree[iParent].dftEvent;
887: return eventPerfInfo[dftEvent].count;
888: }
889: }
890: }
892: typedef struct {
893: int id;
894: PetscLogDouble val;
895: } PetscSortItem;
897: static int compareSortItems(const void *item1_, const void *item2_)
898: {
899: PetscSortItem *item1 = (PetscSortItem *) item1_;
900: PetscSortItem *item2 = (PetscSortItem *) item2_;
901: if (item1->val > item2->val) return -1;
902: if (item1->val < item2->val) return +1;
903: return 0;
904: }
906: static PetscErrorCode PetscLogNestedPrint(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, int iStart, PetscLogDouble totalTime)
907: {
908: int depth = tree[iStart].depth;
909: const char *name;
910: int parentCount, nChildren;
911: PetscSortItem *children;
912: PetscErrorCode ierr;
913: PetscEventPerfInfo *eventPerfInfo;
914: PetscEventPerfInfo myPerfInfo, otherPerfInfo, selfPerfInfo;
915: PetscLogDouble countsPerCall;
916: PetscBool wasPrinted;
917: PetscBool childWasPrinted;
918: MPI_Comm comm;
920: {
921: /* Look up the name of the event and its PerfInfo */
922: const int stage=0;
923: PetscStageLog stageLog;
924: PetscEventRegInfo *eventRegInfo;
925: PetscLogGetStageLog(&stageLog);
926: eventRegInfo = stageLog->eventLog->eventInfo;
927: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
928: name = eventRegInfo[(PetscLogEvent) tree[iStart].nstEvent].name;
929: }
931: PetscObjectGetComm((PetscObject)viewer,&comm);
933: /* Count the number of child processes */
934: nChildren = 0;
935: {
936: int i;
937: for (i=iStart+1; i<nTimers; i++) {
938: if (tree[i].depth<=depth) break;
939: if (tree[i].depth == depth + 1) nChildren++;
940: }
941: }
943: if (nChildren>0) {
944: /* Create an array for the id-s and maxTimes of the children,
945: * leaving 2 spaces for self-time and other-time */
946: int i;
947: PetscLogDouble *times, *maxTimes;
949: PetscMalloc1(nChildren+2,&children);
950: nChildren = 0;
951: for (i=iStart+1; i<nTimers; i++) {
952: if (tree[i].depth<=depth) break;
953: if (tree[i].depth == depth + 1) {
954: children[nChildren].id = i;
955: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
956: nChildren++;
957: }
958: }
960: /* Calculate the children's maximum times, to see whether children will be ignored or printed */
961: PetscMalloc1(nChildren,×);
962: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
964: PetscMalloc1(nChildren,&maxTimes);
965: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
966: PetscFree(times);
968: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
969: PetscFree(maxTimes);
970: }
972: if (!tree[iStart].own) {
973: /* Set values for a timer that was not activated in this process
974: * (but was, in other processes of this run) */
975: PetscMemzero(&myPerfInfo,sizeof(myPerfInfo));
977: selfPerfInfo = myPerfInfo;
978: otherPerfInfo = myPerfInfo;
980: parentCount = 1;
981: countsPerCall = 0;
982: } else {
983: /* Set the values for a timer that was activated in this process */
984: int i;
985: PetscLogEvent dftEvent = tree[iStart].dftEvent;
987: parentCount = countParents( tree, eventPerfInfo, iStart);
988: myPerfInfo = eventPerfInfo[dftEvent];
989: countsPerCall = (PetscLogDouble) myPerfInfo.count / (PetscLogDouble) parentCount;
991: selfPerfInfo = myPerfInfo;
992: otherPerfInfo.time = 0;
993: otherPerfInfo.flops = 0;
994: otherPerfInfo.numMessages = 0;
995: otherPerfInfo.messageLength = 0;
996: otherPerfInfo.numReductions = 0;
998: for (i=0; i<nChildren; i++) {
999: /* For all child counters: subtract the child values from self-timers */
1001: PetscLogEvent dftChild = tree[children[i].id].dftEvent;
1002: PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];
1004: selfPerfInfo.time -= childPerfInfo.time;
1005: selfPerfInfo.flops -= childPerfInfo.flops;
1006: selfPerfInfo.numMessages -= childPerfInfo.numMessages;
1007: selfPerfInfo.messageLength -= childPerfInfo.messageLength;
1008: selfPerfInfo.numReductions -= childPerfInfo.numReductions;
1010: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1011: /* Add them to 'other' if the time is ignored in the output */
1012: otherPerfInfo.time += childPerfInfo.time;
1013: otherPerfInfo.flops += childPerfInfo.flops;
1014: otherPerfInfo.numMessages += childPerfInfo.numMessages;
1015: otherPerfInfo.messageLength += childPerfInfo.messageLength;
1016: otherPerfInfo.numReductions += childPerfInfo.numReductions;
1017: }
1018: }
1019: }
1021: /* Main output for this timer */
1022: PetscLogPrintNestedLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted);
1024: /* Now print the lines for the children */
1025: if (nChildren>0) {
1026: /* Calculate max-times for 'self' and 'other' */
1027: int i;
1028: PetscLogDouble times[2], maxTimes[2];
1029: times[0] = selfPerfInfo.time; times[1] = otherPerfInfo.time;
1030: MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1031: children[nChildren+0].id = -1;
1032: children[nChildren+0].val = maxTimes[0];
1033: children[nChildren+1].id = -2;
1034: children[nChildren+1].val = maxTimes[1];
1036: /* Now sort the children (including 'self' and 'other') on total time */
1037: qsort(children, nChildren+2, sizeof(PetscSortItem), compareSortItems);
1039: /* Print (or ignore) the children in ascending order of total time */
1040: PetscViewerXMLStartSection(viewer,"events", NULL);
1041: for (i=0; i<nChildren+2; i++) {
1042: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1043: /* ignored: no output */
1044: } else if (children[i].id==-1) {
1045: PetscLogPrintNestedLine(viewer, selfPerfInfo, 1, parentCount, depth+1, "self", totalTime, &childWasPrinted);
1046: if (childWasPrinted) {
1047: PetscViewerXMLEndSection(viewer,"event");
1048: }
1049: } else if (children[i].id==-2) {
1050: size_t len;
1051: char *otherName;
1053: PetscStrlen(name,&len);
1054: PetscMalloc1(16+len,&otherName);
1055: sprintf(otherName,"%s: other-timed",name);
1056: PetscLogPrintNestedLine(viewer, otherPerfInfo, 1, 1, depth+1, otherName, totalTime, &childWasPrinted);
1057: PetscFree(otherName);
1058: if (childWasPrinted) {
1059: PetscViewerXMLEndSection(viewer,"event");
1060: }
1061: } else {
1062: /* Print the child with a recursive call to this function */
1063: PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1064: }
1065: }
1066: PetscViewerXMLEndSection(viewer,"events");
1067: PetscFree(children);
1068: }
1070: if (wasPrinted) {
1071: PetscViewerXMLEndSection(viewer, "event");
1072: }
1073: return 0;
1074: }
1076: static PetscErrorCode PetscLogNestedPrintTop(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, PetscLogDouble totalTime)
1077: {
1078: int nChildren;
1079: PetscSortItem *children;
1080: PetscErrorCode ierr;
1081: PetscEventPerfInfo *eventPerfInfo;
1082: MPI_Comm comm;
1085: PetscObjectGetComm((PetscObject)viewer,&comm);
1086: {
1087: /* Look up the PerfInfo */
1088: const int stage=0;
1089: PetscStageLog stageLog;
1090: PetscLogGetStageLog(&stageLog);
1091: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1092: }
1094: /* Count the number of child processes, and count total time */
1095: nChildren = 0;
1096: {
1097: int i;
1098: for (i=0; i<nTimers; i++) {
1099: if (tree[i].depth==1) nChildren++;
1100: }
1101: }
1103: if (nChildren>0) {
1104: /* Create an array for the id-s and maxTimes of the children,
1105: * leaving 2 spaces for self-time and other-time */
1106: int i;
1107: PetscLogDouble *times, *maxTimes;
1109: PetscMalloc1(nChildren,&children);
1110: nChildren = 0;
1111: for (i=0; i<nTimers; i++) {
1112: if (tree[i].depth == 1) {
1113: children[nChildren].id = i;
1114: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1115: nChildren++;
1116: }
1117: }
1118:
1119: /* Calculate the children's maximum times, to sort them */
1120: PetscMalloc1(nChildren,×);
1121: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1123: PetscMalloc1(nChildren,&maxTimes);
1124: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1125: PetscFree(times);
1127: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1128: PetscFree(maxTimes);
1130: /* Now sort the children on total time */
1131: qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1132: /* Print (or ignore) the children in ascending order of total time */
1133: PetscViewerXMLStartSection(viewer, "timertree", "Timings tree");
1134: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1135: PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, threshTime, "%f");
1137: for (i=0; i<nChildren; i++) {
1138: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1139: /* ignored: no output */
1140: } else {
1141: /* Print the child with a recursive call to this function */
1142: PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1143: }
1144: }
1145: PetscViewerXMLEndSection(viewer, "timertree");
1146: PetscFree(children);
1147: }
1148: return(0);
1149: }
1151: typedef struct {
1152: char *name;
1153: PetscLogDouble time;
1154: PetscLogDouble flops;
1155: PetscLogDouble numMessages;
1156: PetscLogDouble messageLength;
1157: PetscLogDouble numReductions;
1158: } PetscSelfTimer;
1160: static PetscErrorCode PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1161: {
1162: PetscErrorCode ierr;
1163: PetscEventPerfInfo *eventPerfInfo;
1164: PetscEventRegInfo *eventRegInfo;
1165: PetscSelfTimer *selftimes;
1166: PetscSelfTimer *totaltimes;
1167: NestedEventId *nstEvents;
1168: int i, maxDefaultTimer;
1169: NestedEventId nst;
1170: PetscLogEvent dft;
1171: int nstMax, nstMax_local;
1172: MPI_Comm comm;
1173:
1175: PetscObjectGetComm((PetscObject)viewer,&comm);
1176: {
1177: const int stage=0;
1178: PetscStageLog stageLog;
1179: PetscLogGetStageLog(&stageLog);
1180: eventRegInfo = stageLog->eventLog->eventInfo;
1181: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1182: }
1184: /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1185: maxDefaultTimer =0;
1186: for (i=0; i<nNestedEvents; i++) {
1187: int nParents = nestedEvents[i].nParents;
1188: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1189: int j;
1190: for (j=0; j<nParents; j++) {
1191: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
1192: }
1193: }
1194: PetscMalloc1(maxDefaultTimer+1,&nstEvents);
1195: for (dft=0; dft<maxDefaultTimer; dft++) {nstEvents[dft] = 0;}
1196: for (i=0; i<nNestedEvents; i++) {
1197: int nParents = nestedEvents[i].nParents;
1198: NestedEventId nstEvent = nestedEvents[i].nstEvent;
1199: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1200: int j;
1201: for (j=0; j<nParents; j++) { nstEvents[dftEvents[j]] = nstEvent; }
1202: }
1204: /* Calculate largest nested event-ID */
1205: nstMax_local = 0;
1206: for (i=0; i<nNestedEvents; i++) { if (nestedEvents[i].nstEvent>nstMax_local) {nstMax_local = nestedEvents[i].nstEvent;} }
1207: MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm);
1210: /* Initialize all total-times with zero */
1211: PetscMalloc1(nstMax+1,&selftimes);
1212: PetscMalloc1(nstMax+1,&totaltimes);
1213: for (nst=0; nst<=nstMax; nst++) {
1214: totaltimes[nst].time = 0;
1215: totaltimes[nst].flops = 0;
1216: totaltimes[nst].numMessages = 0;
1217: totaltimes[nst].messageLength = 0;
1218: totaltimes[nst].numReductions = 0;
1219: totaltimes[nst].name = NULL;
1220: }
1222: /* Calculate total-times */
1223: for (i=0; i<nNestedEvents; i++) {
1224: const int nParents = nestedEvents[i].nParents;
1225: const NestedEventId nstEvent = nestedEvents[i].nstEvent;
1226: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1227: int j;
1228: for (j=0; j<nParents; j++) {
1229: const PetscLogEvent dftEvent = dftEvents[j];
1230: totaltimes[nstEvent].time += eventPerfInfo[dftEvent].time;
1231: totaltimes[nstEvent].flops += eventPerfInfo[dftEvent].flops;
1232: totaltimes[nstEvent].numMessages += eventPerfInfo[dftEvent].numMessages;
1233: totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1234: totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1235: }
1236: totaltimes[nstEvent].name = eventRegInfo[(PetscLogEvent) nstEvent].name;
1237: }
1239: /* Initialize: self-times := totaltimes */
1240: for (nst=0; nst<=nstMax; nst++) { selftimes[nst] = totaltimes[nst]; }
1242: /* Subtract timed subprocesses from self-times */
1243: for (i=0; i<nNestedEvents; i++) {
1244: const int nParents = nestedEvents[i].nParents;
1245: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1246: const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1247: int j;
1248: for (j=0; j<nParents; j++) {
1249: if (dftParentsSorted[j] != DFT_ID_AWAKE) {
1250: const PetscLogEvent dftEvent = dftEvents[j];
1251: const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1252: selftimes[nstParent].time -= eventPerfInfo[dftEvent].time;
1253: selftimes[nstParent].flops -= eventPerfInfo[dftEvent].flops;
1254: selftimes[nstParent].numMessages -= eventPerfInfo[dftEvent].numMessages;
1255: selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1256: selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1257: }
1258: }
1259: }
1261: PetscFree(nstEvents);
1262: PetscFree(totaltimes);
1264: /* Set outputs */
1265: *p_self = selftimes;
1266: *p_nstMax = nstMax;
1267: return(0);
1268: }
1270: static PetscErrorCode PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1271: {
1272: PetscErrorCode ierr;
1273: int i;
1274: NestedEventId nst;
1275: PetscSortItem *sortSelfTimes;
1276: PetscLogDouble *times, *maxTimes;
1277: PetscEventRegInfo *eventRegInfo;
1278: const int dum_depth = 1, dum_count=1, dum_parentcount=1;
1279: PetscBool wasPrinted;
1280: MPI_Comm comm;
1283: PetscObjectGetComm((PetscObject)viewer,&comm);
1284: {
1285: PetscStageLog stageLog;
1286: PetscLogGetStageLog(&stageLog);
1287: eventRegInfo = stageLog->eventLog->eventInfo;
1288: }
1290: PetscMalloc1(nstMax+1,×);
1291: PetscMalloc1(nstMax+1,&maxTimes);
1292: for (nst=0; nst<=nstMax; nst++) { times[nst] = selftimes[nst].time;}
1293: MPIU_Allreduce(times, maxTimes, nstMax+1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1294: PetscFree(times);
1296: PetscMalloc1(nstMax+1,&sortSelfTimes);
1298: /* Sort the self-timers on basis of the largest time needed */
1299: for (nst=0; nst<=nstMax; nst++) {
1300: sortSelfTimes[nst].id = nst;
1301: sortSelfTimes[nst].val = maxTimes[nst];
1302: }
1303: PetscFree(maxTimes);
1304: qsort(sortSelfTimes, nstMax+1, sizeof(PetscSortItem), compareSortItems);
1306: PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings");
1307: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1309: for (i=0; i<=nstMax; i++) {
1310: if ((sortSelfTimes[i].val/totalTime) >= (threshTime/100.0)) {
1311: NestedEventId nstEvent = sortSelfTimes[i].id;
1312: PetscEventPerfInfo selfPerfInfo;
1313: const char *name = eventRegInfo[(PetscLogEvent) nstEvent].name;
1315: selfPerfInfo.time = selftimes[nstEvent].time ;
1316: selfPerfInfo.flops = selftimes[nstEvent].flops;
1317: selfPerfInfo.numMessages = selftimes[nstEvent].numMessages;
1318: selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1319: selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1320:
1321: PetscLogPrintNestedLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted);
1322: if (wasPrinted){
1323: PetscViewerXMLEndSection(viewer, "event");
1324: }
1325: }
1326: }
1327: PetscViewerXMLEndSection(viewer, "selftimertable");
1328: PetscFree(sortSelfTimes);
1329: return(0);
1330: }
1332: PetscErrorCode PetscLogView_Nested(PetscViewer viewer)
1333: {
1334: MPI_Comm comm;
1335: PetscErrorCode ierr;
1336: PetscLogDouble locTotalTime, globTotalTime;
1337: PetscNestedEventTree *tree = NULL;
1338: PetscSelfTimer *selftimers = NULL;
1339: int nTimers = 0, nstMax = 0;
1340: PetscViewerType vType;
1343: PetscViewerGetType(viewer,&vType);
1345: /* Set useXMLFormat that controls the format in all local PetscPrint.. functions */
1346: PetscViewerInitASCII_XML(viewer);
1348: PetscObjectGetComm((PetscObject)viewer,&comm);
1350: PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n");
1351: PetscViewerXMLStartSection(viewer, "petscroot", NULL);
1353: /* Get the total elapsed time, local and global maximum */
1354: PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime;
1355: MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1357: /* Print global information about this run */
1358: PetscPrintExeSpecs(viewer);
1359: PetscPrintGlobalPerformance(viewer, locTotalTime);
1360:
1361: /* Collect nested timer tree info from all processes */
1362: PetscCreateLogTreeNested(viewer, &tree, &nTimers);
1363: PetscLogNestedPrintTop(viewer, tree, nTimers, globTotalTime);
1364: PetscLogFreeNestedTree(tree, nTimers);
1366: /* Calculate self-time for all (not-nested) events */
1367: PetscCalcSelfTime(viewer, &selftimers, &nstMax);
1368: PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime);
1369: PetscFree(selftimers);
1371: PetscViewerXMLEndSection(viewer, "petscroot");
1372: PetscViewerFinalASCII_XML(viewer);
1373: PetscLogNestedEnd();
1374: return(0);
1375: }
1377: #endif