Actual source code: xmllogevent.c
petsc-3.7.1 2016-05-15
1: /*************************************************************************************
2: * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S *
3: *************************************************************************************
4: * authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij *
5: *************************************************************************************
6: * content: Support for nested PetscTimers *
7: *************************************************************************************/
8: #include <petsclog.h>
9: #include <petsc/private/logimpl.h>
10: #include <petsctime.h>
11: #include <petscviewer.h>
12: #include xmllogevent.h
13: #include xmlviewer.h
15: #if defined(PETSC_USE_LOG)
17: /*
18: * Support for nested PetscTimers
19: *
20: * PetscTimers keep track of a lot of useful information: Wall clock times,
21: * message passing statistics, flop counts. Information about the nested structure
22: * of the timers is lost. Example:
23: *
24: * 7:30 Start: awake
25: * 7:30 Start: morning routine
26: * 7:40 Start: eat
27: * 7:49 Done: eat
28: * 7:43 Done: morning routine
29: * 8:15 Start: work
30: * 12:15 Start: eat
31: * 12:45 Done: eat
32: * 16:00 Done: work
33: * 16:30 Start: evening routine
34: * 18:30 Start: eat
35: * 19:15 Done: eat
36: * 22:00 Done: evening routine
37: * 22:00 Done: awake
38: *
39: * Petsc timers provide the following timer results:
40: *
41: * awake: 1 call 14:30 hours
42: * morning routine: 1 call 0:13 hours
43: * eat: 3 calls 1:24 hours
44: * work: 1 call 7:45 hours
45: * evening routine 1 call 5:30 hours
46: *
47: * Nested timers can be used to get the following table:
48: *
49: * [1 call]: awake 14:30 hours
50: * [1 call]: morning routine 0:13 hours ( 2 % of awake)
51: * [1 call]: eat 0:09 hours (69 % of morning routine)
52: * rest (morning routine) 0:04 hours (31 % of morning routine)
53: * [1 call]: work 7:45 hours (53 % of awake)
54: * [1 call]: eat 0:30 hours ( 6 % of work)
55: * rest (work) 7:15 hours (94 % of work)
56: * [1 call]: evening routine 5:30 hours (38 % of awake)
57: * [1 call]: eat 0:45 hours (14 % of evening routine)
58: * rest (evening routine) 4:45 hours (86 % of morning routine)
59: *
60: * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least,
61: * the nested timers make the stages unnecessary.
62: *
63: */
65: /*
66: * Data structures for keeping track of nested timers:
67: *
68: * nestedEvents: information about the timers that have actually been activated
69: * dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
70: *
71: * The Default-timers are used to time the nested timers. Every nested timer corresponds to
72: * (one or more) default timers, where one of the default timers has the same event-id as the
73: * nested one.
74: *
75: * Because of the risk of confusion between nested timer ids and default timer ids, we
76: * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent
77: * only for default events. Also, all nested event variables are prepended with 'nst', and
78: * default timers with 'dft'.
79: */
80: typedef PetscLogEvent NestedEventId;
81: typedef struct {
82: NestedEventId nstEvent; /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
83: int nParents; /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
84: PetscLogEvent *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
85: PetscLogEvent *dftEvents; /* The default timers which represent the different 'instances' of this nested event */
87: PetscLogEvent *dftParents; /* The default timers which were the dftParentActive when this nested event was started */
88: PetscLogEvent *dftEventsSorted; /* The default timers which represent the different 'instances' of this nested event */
89: } PetscNestedEvent;
91: static PetscLogEvent dftParentActive = 0;
92: static int nNestedEvents = 0;
93: static int nNestedEventsAllocated = 0;
94: static PetscNestedEvent *nestedEvents = NULL;
95: static PetscLogDouble threshTime = 0.01; /* initial value was .1 */
97: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
98: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
102: PetscErrorCode PetscLogNestedBegin(void)
103: {
104: PetscErrorCode ierr;
106: if (nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents already allocated");
108: nNestedEventsAllocated=10;
109: PetscMalloc1(nNestedEventsAllocated,&nestedEvents);
112: dftParentActive = 0;
113: nNestedEvents =1;
115: /* 'Awake' is nested event 0. It has no parents */
116: nestedEvents[0].nstEvent = 0;
117: nestedEvents[0].nParents = 0;
118: nestedEvents[0].dftParentsSorted = NULL;
119: nestedEvents[0].dftEvents = NULL;
120: nestedEvents[0].dftParents = NULL;
121: nestedEvents[0].dftEventsSorted = NULL;
123: PetscLogSet(PetscLogEventBeginNested, PetscLogEventEndNested);
124: return(0);
125: }
127: /* Delete the data structures for the nested timers */
130: PetscErrorCode PetscLogNestedEnd(void)
131: {
132: PetscErrorCode ierr;
133: int i;
136: if (!nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents does not exist");
138: for (i=0; i<nNestedEvents; i++) {
139: PetscFree4(nestedEvents[i].dftParentsSorted,nestedEvents[i].dftEventsSorted,nestedEvents[i].dftParents,nestedEvents[i].dftEvents);
140: }
141: PetscFree(nestedEvents);
142: nestedEvents = NULL;
143: nNestedEvents = 0;
144: nNestedEventsAllocated = 0;
145: return(0);
146: }
149: /*
150: * UTILITIES: FIND STUFF IN SORTED ARRAYS
151: *
152: * Utility: find a default timer in a sorted array */
155: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex, /* index to be found */
156: const PetscLogEvent *dftArray, /* sorted array of PetscLogEvent-ids */
157: int narray, /* dimension of dftArray */
158: int *entry) /* entry in the array where dftIndex may be found;
159: * if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray
160: * In that case, the dftIndex can be inserted at this entry. */
161: {
163: if (narray==0 || dftIndex <= dftArray[0]) {
164: *entry = 0;
165: } else if (dftIndex > dftArray[narray-1]) {
166: *entry = narray;
167: } else {
168: int ihigh=narray-1, ilow=0;
169: while (ihigh>ilow) {
170: const int imiddle = (ihigh+ilow)/2;
171: if (dftArray[imiddle] > dftIndex) {
172: ihigh=imiddle;
173: } else if (dftArray[imiddle]<dftIndex) {
174: ilow =imiddle+1;
175: } else {
176: ihigh=imiddle;
177: ilow =imiddle;
178: }
179: }
180: *entry = ihigh;
181: }
182: return(0);
183: }
185: /* Utility: find the nested event with given identification */
188: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent, /* Nested event to be found */
189: int *entry) /* entry in the nestedEvents where nstEvent may be found;
190: if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray */
191: {
194: if (nNestedEvents==0 || nstEvent <= nestedEvents[0].nstEvent) {
195: *entry = 0;
196: } else if (nstEvent > nestedEvents[nNestedEvents-1].nstEvent) {
197: *entry = nNestedEvents;
198: } else {
199: int ihigh=nNestedEvents-1, ilow=0;
200: while (ihigh>ilow) {
201: const int imiddle = (ihigh+ilow)/2;
202: if (nestedEvents[imiddle].nstEvent > nstEvent) {
203: ihigh=imiddle;
204: } else if (nestedEvents[imiddle].nstEvent<nstEvent) {
205: ilow =imiddle+1;
206: } else {
207: ihigh=imiddle;
208: ilow =imiddle;
209: }
210: }
211: *entry = ihigh;
212: }
213: return(0);
214: }
216: /******************************************************************************************/
217: /* Start a nested event */
220: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
221: {
222: PetscErrorCode ierr;
223: int entry, pentry, tentry,i;
224: PetscLogEvent dftEvent;
227: PetscLogEventFindNestedTimer(nstEvent, &entry);
228: if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
229: /* Nested event doesn't exist yet: create it */
231: if (nNestedEvents==nNestedEventsAllocated) {
232: /* Enlarge and re-allocate nestedEvents if needed */
233: PetscNestedEvent *tmp = nestedEvents;
234: PetscMalloc1(2*nNestedEvents,&nestedEvents);
235: nNestedEventsAllocated*=2;
236: PetscMemcpy(nestedEvents, tmp, nNestedEvents*sizeof(PetscNestedEvent));
237: PetscFree(tmp);
238: }
240: /* Clear space in nestedEvents for new nested event */
241: nNestedEvents++;
242: for (i = nNestedEvents-1; i>entry; i--) {
243: nestedEvents[i] = nestedEvents[i-1];
244: }
246: /* Create event in nestedEvents */
247: nestedEvents[entry].nstEvent = nstEvent;
248: nestedEvents[entry].nParents=1;
249: PetscMalloc4(1,&nestedEvents[entry].dftParentsSorted,1,&nestedEvents[entry].dftEventsSorted,1,&nestedEvents[entry].dftParents,1,&nestedEvents[entry].dftEvents);
251: /* Fill in new event */
252: pentry = 0;
253: dftEvent = (PetscLogEvent) nstEvent;
255: nestedEvents[entry].nstEvent = nstEvent;
256: nestedEvents[entry].dftParents[pentry] = dftParentActive;
257: nestedEvents[entry].dftEvents[pentry] = dftEvent;
258: nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
259: nestedEvents[entry].dftEventsSorted[pentry] = dftEvent;
261: } else {
262: /* Nested event exists: find current dftParentActive among parents */
263: PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
264: PetscLogEvent *dftEvents = nestedEvents[entry].dftEvents;
265: int nParents = nestedEvents[entry].nParents;
267: PetscLogEventFindDefaultTimer( dftParentActive, dftParentsSorted, nParents, &pentry);
269: if (pentry>=nParents || dftParentActive != dftParentsSorted[pentry]) {
270: /* dftParentActive not in the list: add it to the list */
271: int i;
272: PetscLogEvent *dftParents = nestedEvents[entry].dftParents;
273: PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
274: char name[100];
276: /* Register a new default timer */
277: sprintf(name, "%d -> %d", (int) dftParentActive, (int) nstEvent);
278: PetscLogEventRegister(name, 0, &dftEvent);
279: PetscLogEventFindDefaultTimer( dftEvent, dftEventsSorted, nParents, &tentry);
281: /* Reallocate parents and dftEvents to make space for new parent */
282: PetscMalloc4(1+nParents,&nestedEvents[entry].dftParentsSorted,1+nParents,&nestedEvents[entry].dftEventsSorted,1+nParents,&nestedEvents[entry].dftParents,1+nParents,&nestedEvents[entry].dftEvents);
283: PetscMemcpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents*sizeof(PetscLogEvent));
284: PetscMemcpy(nestedEvents[entry].dftEventsSorted, dftEventsSorted, nParents*sizeof(PetscLogEvent));
285: PetscMemcpy(nestedEvents[entry].dftParents, dftParents, nParents*sizeof(PetscLogEvent));
286: PetscMemcpy(nestedEvents[entry].dftEvents, dftEvents, nParents*sizeof(PetscLogEvent));
287: PetscFree4(dftParentsSorted,dftEventsSorted,dftParents,dftEvents);
289: dftParents = nestedEvents[entry].dftParents;
290: dftEvents = nestedEvents[entry].dftEvents;
291: dftParentsSorted = nestedEvents[entry].dftParentsSorted;
292: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
294: nestedEvents[entry].nParents++;
295: nParents++;
297: for (i = nParents-1; i>pentry; i--) {
298: dftParentsSorted[i] = dftParentsSorted[i-1];
299: dftEvents[i] = dftEvents[i-1];
300: }
301: for (i = nParents-1; i>tentry; i--) {
302: dftParents[i] = dftParents[i-1];
303: dftEventsSorted[i] = dftEventsSorted[i-1];
304: }
306: /* Fill in the new default timer */
307: dftParentsSorted[pentry] = dftParentActive;
308: dftEvents[pentry] = dftEvent;
309: dftParents[tentry] = dftParentActive;
310: dftEventsSorted[tentry] = dftEvent;
312: } else {
313: /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
314: dftEvent = nestedEvents[entry].dftEvents[pentry];
315: }
316: }
318: /* Start the default 'dftEvent'-timer and update the dftParentActive */
319: PetscLogEventBeginDefault(dftEvent,t,o1,o2,o3,o4);
320: dftParentActive = dftEvent;
321: return(0);
322: }
324: /* End a nested event */
327: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
328: {
329: PetscErrorCode ierr;
330: int entry, pentry, nParents;
331: PetscLogEvent *dftEventsSorted;
334: /* Find the nested event */
335: PetscLogEventFindNestedTimer(nstEvent, &entry);
336: if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs",nstEvent);
337: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
338: nParents = nestedEvents[entry].nParents;
340: /* Find the current default timer among the 'dftEvents' of this event */
341: PetscLogEventFindDefaultTimer( dftParentActive, dftEventsSorted, nParents, &pentry);
343: if (pentry>=nParents || dftEventsSorted[pentry] != dftParentActive) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d",dftParentActive,dftEventsSorted[pentry]);
345: /* Stop the default timer and update the dftParentActive */
346: PetscLogEventEndDefault(dftParentActive,t,o1,o2,o3,o4);
347: dftParentActive = nestedEvents[entry].dftParents[pentry];
348: return(0);
349: }
351: /* Set the threshold time for logging the events
352: */
355: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
356: {
358: *oldThresh = threshTime;
359: threshTime = newThresh;
360: return(0);
361: }
365: static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
366: {
367: PetscErrorCode ierr;
368: char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
369: char version[256], buildoptions[128];
370: PetscMPIInt size;
371: MPI_Comm comm;
372: size_t len;
373:
375: PetscObjectGetComm((PetscObject)viewer,&comm);
376: PetscGetArchType(arch,sizeof(arch));
377: PetscGetHostName(hostname,sizeof(hostname));
378: PetscGetUserName(username,sizeof(username));
379: PetscGetProgramName(pname,sizeof(pname));
380: PetscGetDate(date,sizeof(date));
381: PetscGetVersion(version,sizeof(version));
382: MPI_Comm_size(comm, &size);
384: PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification");
385: PetscViewerXMLPutString( viewer, "executable" , "Executable" , pname );
386: PetscViewerXMLPutString( viewer, "architecture", "Architecture" , arch );
387: PetscViewerXMLPutString( viewer, "hostname" , "Host" , hostname);
388: PetscViewerXMLPutInt( viewer, "nprocesses" , "Number of processes", size );
389: PetscViewerXMLPutString( viewer, "user" , "Run by user" , username);
390: PetscViewerXMLPutString( viewer, "date" , "Started at" , date);
391: PetscViewerXMLPutString( viewer, "petscrelease", "Petsc Release", version);
392: #if defined(PETSC_USE_DEBUG)
393: # if defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_FORTRAN_KERNELS)
394: sprintf(buildoptions, "Debug, ComplexC++Kernels");
395: # else
396: sprintf(buildoptions, "Debug");
397: # endif
398: #else
399: # if defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_FORTRAN_KERNELS)
400: sprintf(buildoptions, "ComplexC++Kernels");
401: # endif
402: #endif
403: PetscStrlen(buildoptions,&len);
404: if (len) {
405: PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions);
406: }
407: PetscViewerXMLEndSection(viewer, "runspecification");
408: return(0);
409: }
411: /* Print the global performance: max, max/min, average and total of
412: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
413: */
416: static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble max, PetscLogDouble ratio, PetscLogDouble avg, PetscLogDouble tot)
417: {
421: PetscViewerXMLStartSection(viewer, name, desc);
422: PetscViewerXMLPutDouble(viewer, "max", NULL, max, "%e");
423: PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f");
424: if (avg>-1.0) {
425: PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e");
426: }
427: if (tot>-1.0) {
428: PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e");
429: }
430: PetscViewerXMLEndSection(viewer, name);
431: return(0);
432: }
434: /* Print the global performance: max, max/min, average and total of
435: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
436: */
439: static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
440: {
441: PetscErrorCode ierr;
442: PetscLogDouble min, max, tot, ratio, avg;
443: PetscLogDouble flops, mem, red, mess;
444: PetscMPIInt size;
445: MPI_Comm comm;
448: PetscObjectGetComm((PetscObject)viewer,&comm);
449: MPI_Comm_size(comm, &size);
451: /* Must preserve reduction count before we go on */
452: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
454: /* Calculate summary information */
455: PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance");
457: /* Time */
458: MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
459: MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
460: MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
461: avg = (tot)/((PetscLogDouble) size);
462: if (min != 0.0) ratio = max/min;
463: else ratio = 0.0;
464: PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", max, ratio, avg, -1.0);
466: /* Objects */
467: avg = (PetscLogDouble) petsc_numObjects;
468: MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
469: MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
470: MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
471: avg = (tot)/((PetscLogDouble) size);
472: if (min != 0.0) ratio = max/min;
473: else ratio = 0.0;
474: PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", max, ratio, avg, -1.0);
476: /* Flop */
477: MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
478: MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
479: MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
480: avg = (tot)/((PetscLogDouble) size);
481: if (min != 0.0) ratio = max/min;
482: else ratio = 0.0;
483: PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
485: /* Flop/sec -- Must talk to Barry here */
486: if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime;
487: else flops = 0.0;
488: MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
489: MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
490: MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
491: avg = (tot)/((PetscLogDouble) size);
492: if (min != 0.0) ratio = max/min;
493: else ratio = 0.0;
494: PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
496: /* Memory */
497: PetscMallocGetMaximumUsage(&mem);
498: if (mem > 0.0) {
499: MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
500: MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
501: MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
502: avg = (tot)/((PetscLogDouble) size);
503: if (min != 0.0) ratio = max/min;
504: else ratio = 0.0;
505: PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
506: }
507: /* Messages */
508: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
509: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
510: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
511: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
512: avg = (tot)/((PetscLogDouble) size);
513: if (min != 0.0) ratio = max/min;
514: else ratio = 0.0;
515: PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", max, ratio, avg, tot);
517: /* Message Volume */
518: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
519: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
520: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
521: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
522: avg = (tot)/((PetscLogDouble) size);
523: if (min != 0.0) ratio = max/min;
524: else ratio = 0.0;
525: PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
527: /* Reductions */
528: MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
529: MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
530: MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
531: if (min != 0.0) ratio = max/min;
532: else ratio = 0.0;
533: PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", max, ratio, -1, -1);
534: PetscViewerXMLEndSection(viewer, "globalperformance");
535: return(0);
536: }
538: typedef struct {
539: PetscLogEvent dftEvent;
540: NestedEventId nstEvent;
541: PetscLogEvent dftParent;
542: NestedEventId nstParent;
543: PetscBool own;
544: int depth;
545: NestedEventId* nstPath;
546: } PetscNestedEventTree;
548: /* Compare timers to sort them in the tree */
549: static int compareTreeItems(const void *item1_, const void *item2_)
550: {
551: int i;
552: PetscNestedEventTree *item1 = (PetscNestedEventTree *) item1_;
553: PetscNestedEventTree *item2 = (PetscNestedEventTree *) item2_;
554: for (i=0; i<PetscMin(item1->depth,item2->depth); i++) {
555: if (item1->nstPath[i]<item2->nstPath[i]) return -1;
556: if (item1->nstPath[i]>item2->nstPath[i]) return +1;
557: }
558: if (item1->depth < item2->depth) return -1;
559: if (item1->depth > item2->depth) return 1;
560: return 0;
561: }
562: /*
563: * Do MPI communication to get the complete, nested calling tree for all processes: there may be
564: * calls that happen in some processes, but not in others.
565: *
566: * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
567: * The tree is sorted so that the timers can be printed in the order of appearance.
568: *
569: * For tree-items which appear in the trees of multiple processes (which will be most items), the
570: * following rule is followed:
571: * + if information from my own process is available, then that is the information stored in tree.
572: * otherwise it is some other process's information.
573: */
576: static PetscErrorCode PetscCreateLogTreeNested(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
577: {
578: PetscNestedEventTree *tree = NULL, *newTree;
579: int *treeIndices;
580: int nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
581: int yesno;
582: PetscBool done;
583: PetscErrorCode ierr;
584: int maxdepth;
585: int depth;
586: int illegalEvent;
587: int iextra;
588: PetscStageLog stageLog;
589: NestedEventId *nstPath, *nstMyPath;
590: MPI_Comm comm;
593: PetscObjectGetComm((PetscObject)viewer,&comm);
594: PetscLogGetStageLog(&stageLog);
596: /* Calculate memory needed to store everybody's information and allocate tree */
597: nTimers = 0;
598: for (i=0; i<nNestedEvents; i++) nTimers+=nestedEvents[i].nParents;
600: PetscMalloc1(nTimers,&tree);
602: /* Fill tree with readily available information */
603: iTimer0 = 0;
604: maxDefaultTimer =0;
605: for (i=0; i<nNestedEvents; i++) {
606: int nParents = nestedEvents[i].nParents;
607: NestedEventId nstEvent = nestedEvents[i].nstEvent;
608: PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
609: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
610: for (j=0; j<nParents; j++) {
611: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
613: tree[iTimer0+j].dftEvent = dftEvents[j];
614: tree[iTimer0+j].nstEvent = nstEvent;
615: tree[iTimer0+j].dftParent = dftParentsSorted[j];
616: tree[iTimer0+j].own = PETSC_TRUE;
618: tree[iTimer0+j].nstParent = 0;
619: tree[iTimer0+j].depth = 0;
620: tree[iTimer0+j].nstPath = NULL;
621: }
622: iTimer0 += nParents;
623: }
625: /* Calculate the global maximum for the default timer index, so array treeIndices can
626: * be allocated only once */
627: MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm);
628: maxDefaultTimer = j;
630: /* Find default timer's place in the tree */
631: PetscCalloc1(maxDefaultTimer+1,&treeIndices);
632: treeIndices[0] = 0;
633: for (i=1; i<nTimers; i++) {
634: PetscLogEvent dftEvent = tree[i].dftEvent;
635: treeIndices[dftEvent] = i;
636: }
638: /* Find each dftParent's nested identification */
639: for (i=0; i<nTimers; i++) {
640: PetscLogEvent dftParent = tree[i].dftParent;
641: if (dftParent) {
642: int j = treeIndices[dftParent];
643: tree[i].nstParent = tree[j].nstEvent;
644: }
645: }
647: /* Find depths for each timer path */
648: done = PETSC_FALSE;
649: maxdepth = 0;
650: tree[0].depth=1;
651: while (!done) {
652: done = PETSC_TRUE;
653: for (i=1; i<nTimers; i++) {
654: int j = treeIndices[tree[i].dftParent];
655: if (j==0) {
656: tree[i].depth=1;
657: } else if (tree[i].dftEvent!=0) {
658: depth = 1+tree[j].depth;
659: if (depth>tree[i].depth) {
660: done = PETSC_FALSE;
661: tree[i].depth = depth;
662: maxdepth = PetscMax(depth,maxdepth);
663: }
664: }
665: }
666: }
668: /* Allocate the paths in the entire tree */
669: for (i=0; i<nTimers; i++) {
670: depth = tree[i].depth;
671: PetscCalloc1(depth,&tree[i].nstPath);
672: }
674: /* Calculate the paths for all timers */
675: for (depth=1; depth<=maxdepth; depth++) {
676: for (i=0; i<nTimers; i++) {
677: if (tree[i].depth==depth) {
678: if (depth>1) {
679: int j = treeIndices[tree[i].dftParent];
680: PetscMemcpy(tree[i].nstPath,tree[j].nstPath,(depth-1)*sizeof(NestedEventId));
681: }
682: tree[i].nstPath[depth-1] = tree[i].nstEvent;
683: }
684: }
685: }
686: PetscFree(treeIndices);
688: /* Sort the tree on basis of the paths */
689: qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);
691: /* Allocate an array to store paths */
692: depth = maxdepth;
693: MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm);
694: PetscMalloc1(maxdepth+2, &nstPath);
695: PetscMalloc1(maxdepth+2, &nstMyPath);
697: /* Find an illegal nested event index (1+largest nested event index) */
698: illegalEvent = 1+nestedEvents[nNestedEvents-1].nstEvent;
699: i = illegalEvent;
700: MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm);
702: /* First, detect timers which are not available in this process, but are available in others
703: * Allocate a new tree, that can contain all timers
704: * Then, fill the new tree with all (own and not-own) timers */
705: newTree= NULL;
706: for (yesno=0; yesno<=1; yesno++) {
707: depth = 1;
708: i = 0;
709: iextra = 0;
710: while (depth>0) {
711: int j;
712: PetscBool same;
714: /* Construct the next path in this process's tree:
715: * if necessary, supplement with invalid path entries */
716: depth++;
717: if (depth > maxdepth+1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Depth %d > maxdepth + 1 %d",depth,maxdepth+1);
718: if (i<nTimers) {
719: for (j=0; j<tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
720: for (j=tree[i].depth; j<=depth; j++) nstMyPath[j] = illegalEvent;
721: } else {
722: for (j=0; j<=depth; j++) nstMyPath[j] = illegalEvent;
723: }
724:
725: /* Communicate with other processes to obtain the next path and its depth */
726: MPIU_Allreduce(nstMyPath, nstPath, depth+1, MPI_INT, MPI_MIN, comm);
727: for (j=depth-1; (int) j>=0; j--) {
728: if (nstPath[j]==illegalEvent) depth=j;
729: }
730:
731: if (depth>0) {
732: /* If the path exists */
733:
734: /* check whether the next path is the same as this process's next path */
735: same = PETSC_TRUE;
736: for (j=0; same && j<=depth; j++) { same = (same && nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;}
737:
738: if (same) {
739: /* Register 'own path' */
740: if (newTree) newTree[i+iextra] = tree[i];
741: i++;
742: } else {
743: /* Register 'not an own path' */
744: if (newTree) {
745: newTree[i+iextra].nstEvent = nstPath[depth-1];
746: newTree[i+iextra].own = PETSC_FALSE;
747: newTree[i+iextra].depth = depth;
748: PetscMalloc1(depth, &newTree[i+iextra].nstPath);
749: for (j=0; j<depth; j++) {newTree[i+iextra].nstPath[j] = nstPath[j];}
750:
751: newTree[i+iextra].dftEvent = 0;
752: newTree[i+iextra].dftParent = 0;
753: newTree[i+iextra].nstParent = 0;
754: }
755: iextra++;
756: }
757:
758: }
759: }
761: /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
762: totalNTimers = nTimers + iextra;
763: if (!newTree) {
764: PetscMalloc1(totalNTimers, &newTree);
765: }
766: }
767: PetscFree(nstPath);
768: PetscFree(nstMyPath);
769: PetscFree(tree);
770: tree = newTree;
771: newTree = NULL;
773: /* Set return value and return */
774: *p_tree = tree;
775: *p_nTimers = totalNTimers;
776: return(0);
777: }
779: /*
780: * Delete the nested timer tree
781: */
784: static PetscErrorCode PetscLogFreeNestedTree(PetscNestedEventTree *tree, int nTimers)
785: {
786: int i;
787: PetscErrorCode ierr;
788:
790: for (i=0; i<nTimers; i++) {
791: PetscFree(tree[i].nstPath);
792: }
793: PetscFree(tree);
794: return(0);
795: }
797: /* Print the global performance: max, max/min, average and total of
798: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
799: */
802: static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble minvalue, PetscLogDouble maxvalue, PetscLogDouble minmaxtreshold)
803: {
807: PetscViewerXMLStartSection(viewer, name, NULL);
808: if (maxvalue>minvalue*minmaxtreshold) {
809: PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%f");
810: PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%f");
811: } else {
812: PetscViewerXMLPutDouble(viewer, "value", NULL, (minvalue+maxvalue)/2.0, "%g");
813: };
814: PetscViewerXMLEndSection(viewer, name);
815: return(0);
816: }
818: #define N_COMM 8
821: static PetscErrorCode PetscLogPrintNestedLine(PetscViewer viewer,PetscEventPerfInfo perfInfo,PetscLogDouble countsPerCall,int parentCount,int depth,const char *name,PetscLogDouble totalTime,PetscBool *isPrinted)
822: {
823: PetscLogDouble time = perfInfo.time;
824: PetscLogDouble timeMx, timeMn;
825: PetscLogDouble countsPerCallMx, countsPerCallMn;
826: PetscLogDouble reductSpeedMx, reductSpeedMn;
827: PetscLogDouble flopSpeedMx, flopSpeedMn;
828: PetscLogDouble msgSpeedMx, msgSpeedMn;
829: PetscLogDouble commarr_in[N_COMM], commarr_out[N_COMM];
831: MPI_Comm comm;
834: PetscObjectGetComm((PetscObject)viewer,&comm);
836: commarr_in[0] = time;
837: commarr_in[1] = -time;
838: MPIU_Allreduce(commarr_in, commarr_out, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
839: timeMx = commarr_out[0];
840: timeMn = -commarr_out[1];
842: commarr_in[0] = time>0.0 ? perfInfo.flops/time : 0;
843: commarr_in[1] = time>0.0 ? perfInfo.numReductions/time : 0;
844: commarr_in[2] = time>0.0 ? perfInfo.messageLength/time : 0;
845: commarr_in[3] = parentCount>0 ? countsPerCall : 0;
847: commarr_in[4] = time>0.0 ? -perfInfo.flops/time : -1e30;
848: commarr_in[5] = time>0.0 ? -perfInfo.numReductions/time : -1e30;
849: commarr_in[6] = time>0.0 ? -perfInfo.messageLength/time : -1e30;
850: commarr_in[7] = parentCount>0 ? -countsPerCall : -1e30;
852: MPIU_Allreduce(commarr_in, commarr_out, N_COMM, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
854: flopSpeedMx = commarr_out[0];
855: reductSpeedMx = commarr_out[1];
856: msgSpeedMx = commarr_out[2];
857: countsPerCallMx = commarr_out[3];
859: flopSpeedMn = -commarr_out[4];
860: reductSpeedMn = -commarr_out[5];
861: msgSpeedMn = -commarr_out[6];
862: countsPerCallMn = -commarr_out[7];
864: *isPrinted = ((timeMx/totalTime) > (threshTime/100.0)) ? PETSC_TRUE : PETSC_FALSE;
865: if (isPrinted) {
866: PetscViewerXMLStartSection(viewer, "event", NULL);
867: PetscViewerXMLPutString(viewer, "name", NULL, name);
868: PetscPrintXMLNestedLinePerfResults(viewer, "time", timeMn/totalTime*100.0, timeMx/totalTime*100.0, 1.02);
871: if (countsPerCallMx<1.01 && countsPerCallMn>0.99) {
872: /* One call per parent */
873: } else {
874: PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", countsPerCallMn, countsPerCallMx, 1.02);
875: }
876:
877: if (flopSpeedMx<0.01) {
878: /* NO flops: don't print */
879: } else {
880: PetscPrintXMLNestedLinePerfResults(viewer, "mflops", flopSpeedMn/1e6, flopSpeedMx/1e6, 1.05);
881: }
882:
883: if (msgSpeedMx<0.01) {
884: /* NO msgs: don't print */
885: } else {
886: PetscPrintXMLNestedLinePerfResults(viewer, "mbps", msgSpeedMn/1024.0/1024.0, msgSpeedMx/1024.0/1024.0, 1.05);
887: }
888:
889: if (reductSpeedMx<0.01) {
890: /* NO reductions: don't print */
891: } else {
892: PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", reductSpeedMn, reductSpeedMx, 1.05);
893: }
894: }
895: return(0);
896: }
898: /* Count the number of times the parent event was called */
900: static int countParents( const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
901: {
902: if (tree[i].depth<=1) {
903: return 1; /* Main event: only once */
904: } else if (!tree[i].own) {
905: return 1; /* This event didn't happen in this process, but did in another */
906: } else {
907: int iParent;
908: for (iParent=i-1; iParent>=0; iParent--) {
909: if (tree[iParent].depth == tree[i].depth-1) break;
910: }
911: if (tree[iParent].depth != tree[i].depth-1) {
912: printf("\n\n ***** Internal error: cannot find parent ****\n\n");
913: return -2;
914: } else {
915: PetscLogEvent dftEvent = tree[iParent].dftEvent;
916: return eventPerfInfo[dftEvent].count;
917: }
918: }
919: }
921: typedef struct {
922: int id;
923: PetscLogDouble val;
924: } PetscSortItem;
926: static int compareSortItems(const void *item1_, const void *item2_)
927: {
928: PetscSortItem *item1 = (PetscSortItem *) item1_;
929: PetscSortItem *item2 = (PetscSortItem *) item2_;
930: if (item1->val > item2->val) return -1;
931: if (item1->val < item2->val) return +1;
932: return 0;
933: }
935: static PetscErrorCode PetscLogNestedPrint(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, int iStart, PetscLogDouble totalTime)
936: {
937: int depth = tree[iStart].depth;
938: const char *name;
939: int parentCount, nChildren;
940: PetscSortItem *children;
941: PetscErrorCode ierr;
942: PetscEventPerfInfo *eventPerfInfo;
943: PetscEventPerfInfo myPerfInfo, otherPerfInfo, selfPerfInfo;
944: PetscLogDouble countsPerCall;
945: PetscBool wasPrinted;
946: PetscBool childWasPrinted;
947: MPI_Comm comm;
949: {
950: /* Look up the name of the event and its PerfInfo */
951: const int stage=0;
952: PetscStageLog stageLog;
953: PetscEventRegInfo *eventRegInfo;
954: PetscLogGetStageLog(&stageLog);
955: eventRegInfo = stageLog->eventLog->eventInfo;
956: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
957: name = eventRegInfo[(PetscLogEvent) tree[iStart].nstEvent].name;
958: }
960: PetscObjectGetComm((PetscObject)viewer,&comm);
962: /* Count the number of child processes */
963: nChildren = 0;
964: {
965: int i;
966: for (i=iStart+1; i<nTimers; i++) {
967: if (tree[i].depth<=depth) break;
968: if (tree[i].depth == depth + 1) nChildren++;
969: }
970: }
972: if (nChildren>0) {
973: /* Create an array for the id-s and maxTimes of the children,
974: * leaving 2 spaces for self-time and other-time */
975: int i;
976: PetscLogDouble *times, *maxTimes;
978: PetscMalloc1(nChildren+2,&children);
979: nChildren = 0;
980: for (i=iStart+1; i<nTimers; i++) {
981: if (tree[i].depth<=depth) break;
982: if (tree[i].depth == depth + 1) {
983: children[nChildren].id = i;
984: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
985: nChildren++;
986: }
987: }
989: /* Calculate the children's maximum times, to see whether children will be ignored or printed */
990: PetscMalloc1(nChildren,×);
991: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
993: PetscMalloc1(nChildren,&maxTimes);
994: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
995: PetscFree(times);
997: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
998: PetscFree(maxTimes);
999: }
1001: if (!tree[iStart].own) {
1002: /* Set values for a timer that was not activated in this process
1003: * (but was, in other processes of this run) */
1004: PetscMemzero(&myPerfInfo,sizeof(myPerfInfo));
1006: selfPerfInfo = myPerfInfo;
1007: otherPerfInfo = myPerfInfo;
1009: parentCount = 1;
1010: countsPerCall = 0;
1011: } else {
1012: /* Set the values for a timer that was activated in this process */
1013: int i;
1014: PetscLogEvent dftEvent = tree[iStart].dftEvent;
1016: parentCount = countParents( tree, eventPerfInfo, iStart);
1017: myPerfInfo = eventPerfInfo[dftEvent];
1018: countsPerCall = (PetscLogDouble) myPerfInfo.count / (PetscLogDouble) parentCount;
1020: selfPerfInfo = myPerfInfo;
1021: otherPerfInfo.time = 0;
1022: otherPerfInfo.flops = 0;
1023: otherPerfInfo.numMessages = 0;
1024: otherPerfInfo.messageLength = 0;
1025: otherPerfInfo.numReductions = 0;
1027: for (i=0; i<nChildren; i++) {
1028: /* For all child counters: subtract the child values from self-timers */
1030: PetscLogEvent dftChild = tree[children[i].id].dftEvent;
1031: PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];
1033: selfPerfInfo.time -= childPerfInfo.time;
1034: selfPerfInfo.flops -= childPerfInfo.flops;
1035: selfPerfInfo.numMessages -= childPerfInfo.numMessages;
1036: selfPerfInfo.messageLength -= childPerfInfo.messageLength;
1037: selfPerfInfo.numReductions -= childPerfInfo.numReductions;
1039: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1040: /* Add them to 'other' if the time is ignored in the output */
1041: otherPerfInfo.time += childPerfInfo.time;
1042: otherPerfInfo.flops += childPerfInfo.flops;
1043: otherPerfInfo.numMessages += childPerfInfo.numMessages;
1044: otherPerfInfo.messageLength += childPerfInfo.messageLength;
1045: otherPerfInfo.numReductions += childPerfInfo.numReductions;
1046: }
1047: }
1048: }
1050: /* Main output for this timer */
1051: PetscLogPrintNestedLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted);
1053: /* Now print the lines for the children */
1054: if (nChildren>0) {
1055: /* Calculate max-times for 'self' and 'other' */
1056: int i;
1057: PetscLogDouble times[2], maxTimes[2];
1058: times[0] = selfPerfInfo.time; times[1] = otherPerfInfo.time;
1059: MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1060: children[nChildren+0].id = -1;
1061: children[nChildren+0].val = maxTimes[0];
1062: children[nChildren+1].id = -2;
1063: children[nChildren+1].val = maxTimes[1];
1065: /* Now sort the children (including 'self' and 'other') on total time */
1066: qsort(children, nChildren+2, sizeof(PetscSortItem), compareSortItems);
1068: /* Print (or ignore) the children in ascending order of total time */
1069: PetscViewerXMLStartSection(viewer,"events", NULL);
1070: for (i=0; i<nChildren+2; i++) {
1071: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1072: /* ignored: no output */
1073: } else if (children[i].id==-1) {
1074: PetscLogPrintNestedLine(viewer, selfPerfInfo, 1, parentCount, depth+1, "self", totalTime, &childWasPrinted);
1075: if (childWasPrinted) {
1076: PetscViewerXMLEndSection(viewer,"event");
1077: }
1078: } else if (children[i].id==-2) {
1079: size_t len;
1080: char *otherName;
1082: PetscStrlen(name,&len);
1083: PetscMalloc1(16+len,&otherName);
1084: sprintf(otherName,"%s: other-timed",name);
1085: PetscLogPrintNestedLine(viewer, otherPerfInfo, 1, 1, depth+1, otherName, totalTime, &childWasPrinted);
1086: PetscFree(otherName);
1087: if (childWasPrinted) {
1088: PetscViewerXMLEndSection(viewer,"event");
1089: }
1090: } else {
1091: /* Print the child with a recursive call to this function */
1092: PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1093: }
1094: }
1095: PetscViewerXMLEndSection(viewer,"events");
1096: PetscFree(children);
1097: }
1099: if (wasPrinted) {
1100: PetscViewerXMLEndSection(viewer, "event");
1101: }
1102: return 0;
1103: }
1107: static PetscErrorCode PetscLogNestedPrintTop(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, PetscLogDouble totalTime)
1108: {
1109: int nChildren;
1110: PetscSortItem *children;
1111: PetscErrorCode ierr;
1112: PetscEventPerfInfo *eventPerfInfo;
1113: MPI_Comm comm;
1116: PetscObjectGetComm((PetscObject)viewer,&comm);
1117: {
1118: /* Look up the PerfInfo */
1119: const int stage=0;
1120: PetscStageLog stageLog;
1121: PetscLogGetStageLog(&stageLog);
1122: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1123: }
1125: /* Count the number of child processes, and count total time */
1126: nChildren = 0;
1127: {
1128: int i;
1129: for (i=0; i<nTimers; i++) {
1130: if (tree[i].depth==1) nChildren++;
1131: }
1132: }
1134: if (nChildren>0) {
1135: /* Create an array for the id-s and maxTimes of the children,
1136: * leaving 2 spaces for self-time and other-time */
1137: int i;
1138: PetscLogDouble *times, *maxTimes;
1140: PetscMalloc1(nChildren,&children);
1141: nChildren = 0;
1142: for (i=0; i<nTimers; i++) {
1143: if (tree[i].depth == 1) {
1144: children[nChildren].id = i;
1145: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1146: nChildren++;
1147: }
1148: }
1149:
1150: /* Calculate the children's maximum times, to sort them */
1151: PetscMalloc1(nChildren,×);
1152: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1154: PetscMalloc1(nChildren,&maxTimes);
1155: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1156: PetscFree(times);
1158: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1159: PetscFree(maxTimes);
1161: /* Now sort the children on total time */
1162: qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1163: /* Print (or ignore) the children in ascending order of total time */
1164: PetscViewerXMLStartSection(viewer, "timertree", "Timings tree");
1165: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1166: PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, threshTime, "%f");
1168: for (i=0; i<nChildren; i++) {
1169: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1170: /* ignored: no output */
1171: } else {
1172: /* Print the child with a recursive call to this function */
1173: PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1174: }
1175: }
1176: PetscViewerXMLEndSection(viewer, "timertree");
1177: PetscFree(children);
1178: }
1179: return(0);
1180: }
1182: typedef struct {
1183: char *name;
1184: PetscLogDouble time;
1185: PetscLogDouble flops;
1186: PetscLogDouble numMessages;
1187: PetscLogDouble messageLength;
1188: PetscLogDouble numReductions;
1189: } PetscSelfTimer;
1193: static PetscErrorCode PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1194: {
1195: PetscErrorCode ierr;
1196: PetscEventPerfInfo *eventPerfInfo;
1197: PetscEventRegInfo *eventRegInfo;
1198: PetscSelfTimer *selftimes;
1199: PetscSelfTimer *totaltimes;
1200: NestedEventId *nstEvents;
1201: int i, maxDefaultTimer;
1202: NestedEventId nst;
1203: PetscLogEvent dft;
1204: int nstMax, nstMax_local;
1205: MPI_Comm comm;
1206:
1208: PetscObjectGetComm((PetscObject)viewer,&comm);
1209: {
1210: const int stage=0;
1211: PetscStageLog stageLog;
1212: PetscLogGetStageLog(&stageLog);
1213: eventRegInfo = stageLog->eventLog->eventInfo;
1214: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1215: }
1217: /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1218: maxDefaultTimer =0;
1219: for (i=0; i<nNestedEvents; i++) {
1220: int nParents = nestedEvents[i].nParents;
1221: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1222: int j;
1223: for (j=0; j<nParents; j++) {
1224: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
1225: }
1226: }
1227: PetscMalloc1(maxDefaultTimer+1,&nstEvents);
1228: for (dft=0; dft<maxDefaultTimer; dft++) {nstEvents[dft] = 0;}
1229: for (i=0; i<nNestedEvents; i++) {
1230: int nParents = nestedEvents[i].nParents;
1231: NestedEventId nstEvent = nestedEvents[i].nstEvent;
1232: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1233: int j;
1234: for (j=0; j<nParents; j++) { nstEvents[dftEvents[j]] = nstEvent; }
1235: }
1237: /* Calculate largest nested event-ID */
1238: nstMax_local = 0;
1239: for (i=0; i<nNestedEvents; i++) { if (nestedEvents[i].nstEvent>nstMax_local) {nstMax_local = nestedEvents[i].nstEvent;} }
1240: MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm);
1243: /* Initialize all total-times with zero */
1244: PetscMalloc1(nstMax+1,&selftimes);
1245: PetscMalloc1(nstMax+1,&totaltimes);
1246: for (nst=0; nst<=nstMax; nst++) {
1247: totaltimes[nst].time = 0;
1248: totaltimes[nst].flops = 0;
1249: totaltimes[nst].numMessages = 0;
1250: totaltimes[nst].messageLength = 0;
1251: totaltimes[nst].numReductions = 0;
1252: totaltimes[nst].name = NULL;
1253: }
1255: /* Calculate total-times */
1256: for (i=0; i<nNestedEvents; i++) {
1257: const int nParents = nestedEvents[i].nParents;
1258: const NestedEventId nstEvent = nestedEvents[i].nstEvent;
1259: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1260: int j;
1261: for (j=0; j<nParents; j++) {
1262: const PetscLogEvent dftEvent = dftEvents[j];
1263: totaltimes[nstEvent].time += eventPerfInfo[dftEvent].time;
1264: totaltimes[nstEvent].flops += eventPerfInfo[dftEvent].flops;
1265: totaltimes[nstEvent].numMessages += eventPerfInfo[dftEvent].numMessages;
1266: totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1267: totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1268: }
1269: totaltimes[nstEvent].name = eventRegInfo[(PetscLogEvent) nstEvent].name;
1270: }
1272: /* Initialize: self-times := totaltimes */
1273: for (nst=0; nst<=nstMax; nst++) { selftimes[nst] = totaltimes[nst]; }
1275: /* Subtract timed supprocesses from self-times */
1276: for (i=0; i<nNestedEvents; i++) {
1277: const int nParents = nestedEvents[i].nParents;
1278: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1279: const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1280: int j;
1281: for (j=0; j<nParents; j++) {
1282: const PetscLogEvent dftEvent = dftEvents[j];
1283: const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1284: selftimes[nstParent].time -= eventPerfInfo[dftEvent].time;
1285: selftimes[nstParent].flops -= eventPerfInfo[dftEvent].flops;
1286: selftimes[nstParent].numMessages -= eventPerfInfo[dftEvent].numMessages;
1287: selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1288: selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1289: }
1290: }
1292: PetscFree(nstEvents);
1293: PetscFree(totaltimes);
1295: /* Set outputs */
1296: *p_self = selftimes;
1297: *p_nstMax = nstMax;
1298: return(0);
1299: }
1303: static PetscErrorCode PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1304: {
1305: PetscErrorCode ierr;
1306: int i;
1307: NestedEventId nst;
1308: PetscSortItem *sortSelfTimes;
1309: PetscLogDouble *times, *maxTimes;
1310: PetscEventRegInfo *eventRegInfo;
1311: const int dum_depth = 1, dum_count=1, dum_parentcount=1;
1312: PetscBool wasPrinted;
1313: MPI_Comm comm;
1316: PetscObjectGetComm((PetscObject)viewer,&comm);
1317: {
1318: PetscStageLog stageLog;
1319: PetscLogGetStageLog(&stageLog);
1320: eventRegInfo = stageLog->eventLog->eventInfo;
1321: }
1323: PetscMalloc1(nstMax+1,×);
1324: PetscMalloc1(nstMax+1,&maxTimes);
1325: for (nst=0; nst<=nstMax; nst++) { times[nst] = selftimes[nst].time;}
1326: MPIU_Allreduce(times, maxTimes, nstMax+1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1327: PetscFree(times);
1329: PetscMalloc1(nstMax+1,&sortSelfTimes);
1331: /* Sort the self-timers on basis of the largest time needed */
1332: for (nst=0; nst<=nstMax; nst++) {
1333: sortSelfTimes[nst].id = nst;
1334: sortSelfTimes[nst].val = maxTimes[nst];
1335: }
1336: PetscFree(maxTimes);
1337: qsort(sortSelfTimes, nstMax+1, sizeof(PetscSortItem), compareSortItems);
1339: PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings");
1340: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1342: for (i=0; i<=nstMax; i++) {
1343: if ((sortSelfTimes[i].val/totalTime) >= (threshTime/100.0)) {
1344: NestedEventId nstEvent = sortSelfTimes[i].id;
1345: PetscEventPerfInfo selfPerfInfo;
1346: const char *name = eventRegInfo[(PetscLogEvent) nstEvent].name;
1348: selfPerfInfo.time = selftimes[nstEvent].time ;
1349: selfPerfInfo.flops = selftimes[nstEvent].flops;
1350: selfPerfInfo.numMessages = selftimes[nstEvent].numMessages;
1351: selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1352: selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1353:
1354: PetscLogPrintNestedLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted);
1355: if (wasPrinted){
1356: PetscViewerXMLEndSection(viewer, "event");
1357: }
1358: }
1359: }
1360: PetscViewerXMLEndSection(viewer, "selftimertable");
1361: PetscFree(sortSelfTimes);
1362: return(0);
1363: }
1367: PetscErrorCode PetscLogView_Nested(PetscViewer viewer)
1368: {
1369: MPI_Comm comm;
1370: PetscErrorCode ierr;
1371: PetscLogDouble locTotalTime, globTotalTime;
1372: PetscNestedEventTree *tree = NULL;
1373: PetscSelfTimer *selftimers = NULL;
1374: int nTimers = 0, nstMax = 0;
1375: PetscViewerType vType;
1378: PetscViewerGetType(viewer,&vType);
1380: /* Set useXMLFormat that controls the format in all local PetscPrint.. functions */
1381: PetscViewerInitASCII_XML(viewer);
1383: PetscObjectGetComm((PetscObject)viewer,&comm);
1385: PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n");
1386: PetscViewerXMLStartSection(viewer, "petscroot", NULL);
1388: /* Get the total elapsed time, local and global maximum */
1389: PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime;
1390: MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1392: /* Print global information about this run */
1393: PetscPrintExeSpecs(viewer);
1394: PetscPrintGlobalPerformance(viewer, locTotalTime);
1395:
1396: /* Collect nested timer tree info from all processes */
1397: PetscCreateLogTreeNested(viewer, &tree, &nTimers);
1398: PetscLogNestedPrintTop(viewer, tree, nTimers, globTotalTime);
1399: PetscLogFreeNestedTree(tree, nTimers);
1401: /* Calculate self-time for all (not-nested) events */
1402: PetscCalcSelfTime(viewer, &selftimers, &nstMax);
1403: PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime);
1404: PetscFree(selftimers);
1406: PetscViewerXMLEndSection(viewer, "petscroot");
1407: PetscViewerFinalASCII_XML(viewer);
1408: PetscLogNestedEnd();
1409: return(0);
1410: }
1412: #endif