Skip to content

Commit

Permalink
Fix Profiler + Minor improvements (#1548)
Browse files Browse the repository at this point in the history
There is currently a bug in the profiler, that the first run recorded
includes the runtime of all the warmup runs, making one ugly outlier.
This is because `times[0]` is set before the warmups and is never
properly reset.

This fixes that and makes the surrounding code a bit cleaner (e.g. using
`np.diff` instead of list comprehension).

I also added some options that make the profiler more usable when it's
called inside of other `tqdm` progress bars (allows setting `tqdm`'s
`leave=False` and stop it from printing results).

List of changes:
- Fix: timing error
- Add: `tqdm_leave` parameter
- Add: `print_results` parameter
- Cleanup: Only print `Profiling...` if `tqdm` is not used
- Cleanup: use `warnings.warn()` instead of `print("WARNING: ...")`
- Cleanup: remove unused variable for return value (didn't seem to be
used)
- Cleanup: make diff calculation more concise

---------

Co-authored-by: Tal Ben-Nun <tbennun@users.noreply.github.com>
  • Loading branch information
JanKleine and tbennun committed Mar 16, 2024
1 parent 5f9233e commit e542965
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 22 deletions.
13 changes: 11 additions & 2 deletions dace/builtin_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@


@contextmanager
def profile(repetitions: int = 100, warmup: int = 0):
def profile(
repetitions: int = 100,
warmup: int = 0,
tqdm_leave: bool = True,
print_results: bool = True,
):
"""
Context manager that enables profiling of each called DaCe program. If repetitions is greater than 1, the
program is run multiple times and the average execution time is reported.
Expand All @@ -35,6 +40,10 @@ def profile(repetitions: int = 100, warmup: int = 0):
:param repetitions: The number of times to run each DaCe program.
:param warmup: Number of additional repetitions to run the program without measuring time.
:param tqdm_leave: Sets the ``leave`` parameter of the ``tqdm`` progress bar (useful
for nested progress bars). Ignored if tqdm progress bar is not used.
:param print_results: Whether or not to print the median execution time after
all repetitions.
:note: Running functions multiple times may affect the results of the program.
"""
from dace.frontend.operations import CompiledSDFGProfiler # Avoid circular import
Expand All @@ -51,7 +60,7 @@ def profile(repetitions: int = 100, warmup: int = 0):
yield hook
return

profiler = CompiledSDFGProfiler(repetitions, warmup)
profiler = CompiledSDFGProfiler(repetitions, warmup, tqdm_leave, print_results)

with on_compiled_sdfg_call(context_manager=profiler):
yield profiler
Expand Down
62 changes: 42 additions & 20 deletions dace/frontend/operations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
from __future__ import print_function
from functools import partial
from itertools import chain, repeat

from contextlib import contextmanager
from timeit import default_timer as timer
Expand All @@ -10,6 +11,7 @@
import sympy
import os
import sys
import warnings

from dace import dtypes
from dace.config import Config
Expand All @@ -28,12 +30,20 @@ class CompiledSDFGProfiler:

times: List[Tuple['SDFG', List[float]]] #: The list of SDFGs and times for each SDFG called within the context.

def __init__(self, repetitions: int = 0, warmup: int = 0) -> None:
def __init__(
self,
repetitions: int = 0,
warmup: int = 0,
tqdm_leave: bool = True,
print_results: bool = True,
) -> None:
# Avoid import loop
from dace.codegen.instrumentation import report

self.repetitions = repetitions or int(Config.get('treps'))
self.warmup = warmup
self.tqdm_leave = tqdm_leave
self.print_results = print_results
if self.repetitions < 1:
raise ValueError('Number of repetitions must be at least 1')
if self.warmup < 0:
Expand All @@ -47,34 +57,45 @@ def __init__(self, repetitions: int = 0, warmup: int = 0) -> None:
def __call__(self, compiled_sdfg: 'CompiledSDFG', args: Tuple[Any, ...]):
from dace.codegen.instrumentation import report # Avoid import loop

start = timer()
# zeros to overwrite start time, followed by indices for each repetition
iterator = chain(repeat(0, self.warmup), range(1, self.repetitions + 1))

times = [start] * (self.repetitions + 1)
ret = None
print('\nProfiling...')

iterator = range(self.warmup + self.repetitions)
if Config.get_bool('profiling_status'):
try:
from tqdm import tqdm
iterator = tqdm(iterator, desc="Profiling", file=sys.stdout)

iterator = tqdm(
iterator,
desc='Profiling',
total=(self.warmup + self.repetitions),
file=sys.stdout,
leave=self.tqdm_leave,
)
except ImportError:
print('WARNING: Cannot show profiling progress, missing optional '
'dependency tqdm...\n\tTo see a live progress bar please install '
'tqdm (`pip install tqdm`)\n\tTo disable this feature (and '
'this warning) set `profiling_status` to false in the dace '
'config (~/.dace.conf).')
warnings.warn(
'Cannot show profiling progress, missing optional dependency '
'tqdm...\n\tTo see a live progress bar please install tqdm '
'(`pip install tqdm`)\n\tTo disable this feature (and this '
'warning) set `profiling_status` to false in the dace config '
'(~/.dace.conf).'
)
print('\nProfiling...')
else:
print('\nProfiling...')

offset = 1 - self.warmup
start_time = int(time.time())

times = np.ndarray(self.repetitions + 1, dtype=np.float64)
times[0] = timer()

for i in iterator:
# Call function
compiled_sdfg._cfunc(compiled_sdfg._libhandle, *args)
if i >= self.warmup:
times[i + offset] = timer()

diffs = np.array([(times[i] - times[i - 1])*1e3 for i in range(1, self.repetitions + 1)])
times[i] = timer()

# compute pairwise differences and convert to milliseconds
diffs = np.diff(times) * 1e3

# Add entries to the instrumentation report
self.report.name = self.report.name or start_time
Expand All @@ -88,8 +109,9 @@ def __call__(self, compiled_sdfg: 'CompiledSDFG', args: Tuple[Any, ...]):
self.report.durations[(0, -1, -1)][f'Python call to {compiled_sdfg.sdfg.name}'][-1].extend(diffs)

# Print profiling results
time_msecs = np.median(diffs)
print(compiled_sdfg.sdfg.name, time_msecs, 'ms')
if self.print_results:
time_msecs = np.median(diffs)
print(compiled_sdfg.sdfg.name, time_msecs, 'ms')

# Save every call separately
self.times.append((compiled_sdfg.sdfg, diffs))
Expand All @@ -105,7 +127,7 @@ def __call__(self, compiled_sdfg: 'CompiledSDFG', args: Tuple[Any, ...]):
# Restore state after skipping contents
compiled_sdfg.do_not_execute = old_dne

return ret
return None


def detect_reduction_type(wcr_str, openmp=False):
Expand Down

0 comments on commit e542965

Please sign in to comment.