"examples/bench.py" did not exist on "7e59976babdf5bf1d8c77b7468b2bcbf23d53614"
Unverified Commit 7184bdd1 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Benchmarks - Update result parsing in tensorrt inference (#541)

* Update result parsing for newer tensorrt versions
* Update arguments when load torchvision models
parent f2599137
......@@ -166,6 +166,7 @@ def run(self):
'numpy>=1.19.2',
'omegaconf==2.0.6',
'openpyxl>=3.0.7',
'packaging>=21.0',
'pandas>=1.1.5',
'pssh @ git+https://github.com/lilydjwg/pssh.git@v2.3.4',
'pyyaml>=5.3',
......
......@@ -5,6 +5,7 @@
from pathlib import Path
from packaging import version
import torch.hub
import torch.onnx
import torchvision.models
......@@ -129,7 +130,9 @@ def export_torchvision_model(self, model_name, batch_size=1):
if not self.check_torchvision_model(model_name):
return ''
file_name = str(self._onnx_model_path / (model_name + '.onnx'))
model = getattr(torchvision.models, model_name)(pretrained=False).eval().cuda()
# the parameter 'pretrained' is deprecated since 0.13 in torchvision
args = {'pretrained': False} if version.parse(torchvision.__version__) < version.parse('0.13') else {}
model = getattr(torchvision.models, model_name)(**args).eval().cuda()
dummy_input = torch.randn((batch_size, 3, 224, 224), device='cuda')
torch.onnx.export(
model,
......
......@@ -145,6 +145,17 @@ def _process_raw_result(self, cmd_idx, raw_output):
self._result.add_result(f'{model}_host_time_{tag}', float(lats[0]))
self._result.add_result(f'{model}_end_to_end_time_{tag}', float(lats[1]))
success = True
if '[I] Latency:' in line or '[I] GPU Compute Time:' in line:
tm = 'gpu' if '[I] GPU Compute Time:' in line else 'host'
self._result.add_result(
f'{model}_{tm}_time_mean',
float(re.findall(r'mean = (\d+\.\d+) ms', line)[0]),
)
self._result.add_result(
f'{model}_{tm}_time_99',
float(re.findall(r'\(99\%\) = (\d+\.\d+) ms', line)[0]),
)
success = True
except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
logger.error(
......
......@@ -116,16 +116,17 @@ def test_tensorrt_inference_params(self):
len(test_case.get('pytorch_models', benchmark._pytorch_models)), len(benchmark._commands)
)
@decorator.load_data('tests/data/tensorrt_inference.log')
def test_tensorrt_inference_result_parsing(self, test_raw_log):
@decorator.load_data('tests/data/tensorrt_inference.1.log')
@decorator.load_data('tests/data/tensorrt_inference.2.log')
def test_tensorrt_inference_result_parsing(self, test_raw_log_1, test_raw_log_2):
"""Test tensorrt-inference benchmark result parsing."""
(benchmark_cls, _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(self.benchmark_name, Platform.CUDA)
benchmark = benchmark_cls(self.benchmark_name, parameters='')
benchmark._args = SimpleNamespace(pytorch_models=['model_0', 'model_1'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)
# Positive case - valid raw output
self.assertTrue(benchmark._process_raw_result(0, test_raw_log))
# Positive case 1 - valid raw output
self.assertTrue(benchmark._process_raw_result(0, test_raw_log_1))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)
self.assertEqual(6 + benchmark.default_metric_count, len(benchmark.result))
......@@ -134,5 +135,12 @@ def test_tensorrt_inference_result_parsing(self, test_raw_log):
self.assertEqual(0.6, benchmark.result[f'model_0_host_time_{tag}'][0])
self.assertEqual(1.0, benchmark.result[f'model_0_end_to_end_time_{tag}'][0])
# Positive case 2 - valid raw output
self.assertTrue(benchmark._process_raw_result(0, test_raw_log_2))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)
for tag in ['mean', '99']:
self.assertEqual(1.5, benchmark.result[f'model_0_gpu_time_{tag}'][1])
self.assertEqual(2.0, benchmark.result[f'model_0_host_time_{tag}'][1])
# Negative case - invalid raw output
self.assertFalse(benchmark._process_raw_result(1, 'Invalid raw output'))
[06/29/2023-08:24:55] [I] === Model Options ===
[06/29/2023-08:24:55] [I] Format: ONNX
[06/29/2023-08:24:55] [I] Model: /root/.cache/torch/hub/onnx/resnet50.onnx
[06/29/2023-08:24:55] [I] Output:
[06/29/2023-08:24:55] [I] === Build Options ===
[06/29/2023-08:24:55] [I] Max batch: explicit batch
[06/29/2023-08:24:55] [I] Memory Pools: workspace: 8192 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default
[06/29/2023-08:24:55] [I] minTiming: 1
[06/29/2023-08:24:55] [I] avgTiming: 8
[06/29/2023-08:24:55] [I] Precision: FP32+FP16
[06/29/2023-08:24:55] [I] LayerPrecisions:
[06/29/2023-08:24:55] [I] Calibration:
[06/29/2023-08:24:55] [I] Refit: Disabled
[06/29/2023-08:24:55] [I] Sparsity: Disabled
[06/29/2023-08:24:55] [I] Safe mode: Disabled
[06/29/2023-08:24:55] [I] DirectIO mode: Disabled
[06/29/2023-08:24:55] [I] Restricted mode: Disabled
[06/29/2023-08:24:55] [I] Build only: Disabled
[06/29/2023-08:24:55] [I] Save engine:
[06/29/2023-08:24:55] [I] Load engine:
[06/29/2023-08:24:55] [I] Profiling verbosity: 0
[06/29/2023-08:24:55] [I] Tactic sources: Using default tactic sources
[06/29/2023-08:24:55] [I] timingCacheMode: local
[06/29/2023-08:24:55] [I] timingCacheFile:
[06/29/2023-08:24:55] [I] Heuristic: Disabled
[06/29/2023-08:24:55] [I] Preview Features: Use default preview flags.
[06/29/2023-08:24:55] [I] Input(s)s format: fp32:CHW
[06/29/2023-08:24:55] [I] Output(s)s format: fp32:CHW
[06/29/2023-08:24:55] [I] Input build shape: input=32x3x224x224+32x3x224x224+32x3x224x224
[06/29/2023-08:24:55] [I] Input calibration shapes: model
[06/29/2023-08:24:55] [I] === System Options ===
[06/29/2023-08:24:55] [I] Device: 0
[06/29/2023-08:24:55] [I] DLACore:
[06/29/2023-08:24:55] [I] Plugins:
[06/29/2023-08:24:55] [I] === Inference Options ===
[06/29/2023-08:24:55] [I] Batch: Explicit
[06/29/2023-08:24:55] [I] Input inference shape: input=32x3x224x224
[06/29/2023-08:24:55] [I] Iterations: 2048
[06/29/2023-08:24:55] [I] Duration: 3s (+ 200ms warm up)
[06/29/2023-08:24:55] [I] Sleep time: 0ms
[06/29/2023-08:24:55] [I] Idle time: 0ms
[06/29/2023-08:24:55] [I] Streams: 1
[06/29/2023-08:24:55] [I] ExposeDMA: Disabled
[06/29/2023-08:24:55] [I] Data transfers: Enabled
[06/29/2023-08:24:55] [I] Spin-wait: Disabled
[06/29/2023-08:24:55] [I] Multithreading: Disabled
[06/29/2023-08:24:55] [I] CUDA Graph: Disabled
[06/29/2023-08:24:55] [I] Separate profiling: Disabled
[06/29/2023-08:24:55] [I] Time Deserialize: Disabled
[06/29/2023-08:24:55] [I] Time Refit: Disabled
[06/29/2023-08:24:55] [I] NVTX verbosity: 0
[06/29/2023-08:24:55] [I] Persistent Cache Ratio: 0
[06/29/2023-08:24:55] [I] Inputs:
[06/29/2023-08:24:55] [I] === Reporting Options ===
[06/29/2023-08:24:55] [I] Verbose: Disabled
[06/29/2023-08:24:55] [I] Averages: 10 inferences
[06/29/2023-08:24:55] [I] Percentiles: 99
[06/29/2023-08:24:55] [I] Dump refittable layers:Disabled
[06/29/2023-08:24:55] [I] Dump output: Disabled
[06/29/2023-08:24:55] [I] Profile: Disabled
[06/29/2023-08:24:55] [I] Export timing to JSON file:
[06/29/2023-08:24:55] [I] Export output to JSON file:
[06/29/2023-08:24:55] [I] Export profile to JSON file:
[06/29/2023-08:25:38] [I]
[06/29/2023-08:25:38] [I] === Trace details ===
[06/29/2023-08:25:38] [I] Trace averages of 10 runs:
[06/29/2023-08:25:38] [I] Average on 10 runs - GPU latency: 1.5 ms - Host latency: 2.0 ms (enqueue 0.4 ms)
[06/29/2023-08:25:38] [I] Average on 10 runs - GPU latency: 1.5 ms - Host latency: 2.0 ms (enqueue 0.4 ms)
[06/29/2023-08:25:38] [I]
[06/29/2023-08:25:38] [I] === Performance summary ===
[06/29/2023-08:25:38] [I] Throughput: 1000.00 qps
[06/29/2023-08:25:38] [I] Latency: min = 1.9 ms, max = 2.1 ms, mean = 2.0 ms, median = 2.0 ms, percentile(99%) = 2.0 ms
[06/29/2023-08:25:38] [I] Enqueue Time: min = 0.3 ms, max = 0.3 ms, mean = 0.3 ms, median = 0.3 ms, percentile(99%) = 0.3 ms
[06/29/2023-08:25:38] [I] H2D Latency: min = 0.3 ms, max = 0.3 ms, mean = 0.3 ms, median = 0.3 ms, percentile(99%) = 0.3 ms
[06/29/2023-08:25:38] [I] GPU Compute Time: min = 1.4 ms, max = 1.6 ms, mean = 1.5 ms, median = 1.5 ms, percentile(99%) = 1.5 ms
[06/29/2023-08:25:38] [I] D2H Latency: min = 0.03 ms, max = 0.03 ms, mean = 0.03 ms, median = 0.03 ms, percentile(99%) = 0.03 ms
[06/29/2023-08:25:38] [I] Total Host Walltime: 3.0 s
[06/29/2023-08:25:38] [I] Total GPU Compute Time: 2.9 s
[06/29/2023-08:25:38] [I] Explanations of the performance metrics are printed in the verbose logs.
[06/29/2023-08:25:38] [I]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment