Skip to content
This repository was archived by the owner on Feb 3, 2025. It is now read-only.

Commit bfed396

Browse files
author
DEKHTIARJonathan
committed
[Benchmarking-Py] 2.0.3 Release - Adding Profiling for convert & build
1 parent b7e663d commit bfed396

File tree

8 files changed

+287
-153
lines changed

8 files changed

+287
-153
lines changed

tftrt/benchmarking-python/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ Description of the change
4646

4747
<!-- YOU CAN EDIT FROM HERE -->
4848

49+
## [2.0.3] - 2022.10.20 - @DEKHTIARJonathan
50+
51+
- Profiling for TF-TRT build and convert added
52+
- Argument `tf_profile_export_path` renamed: `tftrt_build_profile_export_path`
53+
- NVIDIA PyCOCOTools Updated to version 0.7.1
54+
4955
## [2.0.2] - 2022.09.30 - @DEKHTIARJonathan
5056

5157
- Bug Fixed in real data GPU Prefetcher that could cause a crash when the dataset

tftrt/benchmarking-python/benchmark_args.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,23 @@ def __init__(self):
308308
# =========================== TF Profiling =========================== #
309309

310310
self._parser.add_argument(
311-
"--tf_profile_export_path",
311+
"--tftrt_build_profile_export_path",
312+
type=str,
313+
default=None,
314+
help="If set, the script will export tf.profile files for further "
315+
"performance analysis."
316+
)
317+
318+
self._parser.add_argument(
319+
"--tftrt_convert_profile_export_path",
320+
type=str,
321+
default=None,
322+
help="If set, the script will export tf.profile files for further "
323+
"performance analysis."
324+
)
325+
326+
self._parser.add_argument(
327+
"--inference_loop_profile_export_path",
312328
type=str,
313329
default=None,
314330
help="If set, the script will export tf.profile files for further "
@@ -451,6 +467,18 @@ def _post_process_args(self, args):
451467
if args.debug or args.debug_data_aggregation or args.debug_performance:
452468
logging.set_verbosity(logging.DEBUG)
453469

470+
if (args.inference_loop_profile_export_path or
471+
args.tftrt_build_profile_export_path or
472+
args.tftrt_convert_profile_export_path):
473+
"""Warm-up the profiler session.
474+
The profiler session will set up profiling context, including loading CUPTI
475+
library for GPU profiling. This is used for improving the accuracy of
476+
the profiling results.
477+
"""
478+
from tensorflow.python.profiler.profiler_v2 import warmup
479+
logging.info("[PROFILER] Warming Up ...")
480+
warmup()
481+
454482
return args
455483

456484
def parse_args(self):

tftrt/benchmarking-python/benchmark_info.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# The `__version__` number shall be updated everytime core benchmarking files
1111
# are updated.
1212
# Please update CHANGELOG.md with a description of what this version changed.
13-
__version__ = "2.0.2"
13+
__version__ = "2.0.3"
1414

1515

1616
def get_commit_id():
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env python
2+
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# -*- coding: utf-8 -*-
4+
5+
import functools
6+
import contextlib
7+
8+
import tensorflow as tf
9+
10+
from benchmark_logger import logging
11+
from benchmark_utils import timed_section
12+
13+
14+
class ProfilingCTX(object):
15+
16+
def __init__(self, export_dir=None, verbose=False, delay_ms=0):
17+
self._started = False
18+
self._export_dir = export_dir
19+
self._verbose = verbose
20+
self._delay_ms = delay_ms
21+
22+
def start(self):
23+
if not self._started and self._export_dir is not None:
24+
try:
25+
profiler_opts = tf.profiler.experimental.ProfilerOptions(
26+
# Ajust TraceMe levels:
27+
# - 1: critical
28+
# - 2: info [default]
29+
# - 3: verbose
30+
host_tracer_level=3 if self._verbose else 2,
31+
# Enables python function call tracing
32+
# - 0: disabled [default]
33+
# - 1: enabled
34+
python_tracer_level=1 if self._verbose else 0,
35+
# Adjust device (TPU/GPU) tracer level:
36+
# - 0: disabled
37+
# - 1: enabled [default]
38+
device_tracer_level=1,
39+
delay_ms=self._delay_ms
40+
)
41+
tf.profiler.experimental.start(
42+
logdir=self._export_dir, options=profiler_opts
43+
)
44+
logging.info(
45+
"[PROFILER] Starting Profiling - Data will be stored in: "
46+
f"`{self._export_dir}`"
47+
)
48+
self._started = True
49+
50+
except tf.errors.AlreadyExistsError:
51+
logging.warning(
52+
"[PROFILER] Could not start the profiler. It "
53+
"appears to have been previously been started."
54+
)
55+
56+
def stop(self):
57+
if self._started:
58+
try:
59+
tf.profiler.experimental.stop()
60+
logging.info(
61+
"[PROFILER] Stopping Profiling - Data has been stored in: "
62+
f"`{self._export_dir}`"
63+
)
64+
# profiler has already been stopped or not started
65+
except tf.errors.UnavailableError:
66+
logging.warning(
67+
"[PROFILER] Could not stop the profiler. It "
68+
"appears to have been previously been stopped."
69+
)
70+
pass
71+
self._started = False
72+
73+
def __enter__(self):
74+
self.start()
75+
76+
def __exit__(self, exc_type, exc_value, traceback):
77+
self.stop()
78+
79+
80+
@contextlib.contextmanager
81+
def time_and_trace_ctx(message, step_num=None, _r=None):
82+
with timed_section(message):
83+
with tf.profiler.experimental.Trace(message, step_num=step_num, _r=_r):
84+
yield

0 commit comments

Comments
 (0)