Unverified Commit 6d895da8 authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

Benchmarks: Add Feature - Provide option to save raw data into file. (#333)

**Description**
Use config `log_raw_data` to control whether log the raw data into file or not. The default value is `no`. We can set it as `yes` for some particular benchmarks to save the raw data into file, such as NCCL/RCCL test.
parent d368d90e
...@@ -127,7 +127,9 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -127,7 +127,9 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data(f'raw_output_{self._args.pytorch_models[cmd_idx]}', raw_output) self._result.add_raw_data(
f'raw_output_{self._args.pytorch_models[cmd_idx]}', raw_output, self._args.log_raw_data
)
success = False success = False
try: try:
......
...@@ -400,8 +400,8 @@ def __process_model_result(self, model_action, precision, step_times): ...@@ -400,8 +400,8 @@ def __process_model_result(self, model_action, precision, step_times):
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec. # The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
millisecond_per_second = 1000 millisecond_per_second = 1000
throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times] throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times]
self._result.add_raw_data(metric_s, step_times) self._result.add_raw_data(metric_s, step_times, self._args.log_raw_data)
self._result.add_raw_data(metric_t, throughput) self._result.add_raw_data(metric_t, throughput, self._args.log_raw_data)
if model_action == ModelAction.TRAIN: if model_action == ModelAction.TRAIN:
if not self._sync_result(step_times): if not self._sync_result(step_times):
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
"""A module for unified result of benchmarks.""" """A module for unified result of benchmarks."""
import os
import json import json
from enum import Enum from enum import Enum
...@@ -46,7 +47,7 @@ def __eq__(self, rhs): ...@@ -46,7 +47,7 @@ def __eq__(self, rhs):
""" """
return self.__dict__ == rhs.__dict__ return self.__dict__ == rhs.__dict__
def add_raw_data(self, metric, value): def add_raw_data(self, metric, value, log_raw_data):
"""Add raw benchmark data into result. """Add raw benchmark data into result.
Args: Args:
...@@ -54,6 +55,7 @@ def add_raw_data(self, metric, value): ...@@ -54,6 +55,7 @@ def add_raw_data(self, metric, value):
value (str or list): raw benchmark data. value (str or list): raw benchmark data.
For e2e model benchmarks, its type is list. For e2e model benchmarks, its type is list.
For micro-benchmarks or docker-benchmarks, its type is string. For micro-benchmarks or docker-benchmarks, its type is string.
log_raw_data (bool): whether to log raw data into file instead of saving it into result object.
Return: Return:
True if succeed to add the raw data. True if succeed to add the raw data.
...@@ -64,9 +66,14 @@ def add_raw_data(self, metric, value): ...@@ -64,9 +66,14 @@ def add_raw_data(self, metric, value):
) )
return False return False
if metric not in self.__raw_data: if log_raw_data:
self.__raw_data[metric] = list() with open(os.path.join(os.getcwd(), 'rawdata.log'), 'a') as f:
self.__raw_data[metric].append(value) f.write('metric:{}\n'.format(metric))
f.write('rawdata:{}\n\n'.format(value))
else:
if metric not in self.__raw_data:
self.__raw_data[metric] = list()
self.__raw_data[metric].append(value)
return True return True
......
...@@ -200,6 +200,8 @@ def exec(self): ...@@ -200,6 +200,8 @@ def exec(self):
benchmark_config = self._sb_benchmarks[benchmark_name] benchmark_config = self._sb_benchmarks[benchmark_name]
benchmark_results = list() benchmark_results = list()
self.__create_benchmark_dir(benchmark_name) self.__create_benchmark_dir(benchmark_name)
cwd = os.getcwd()
os.chdir(self.__get_benchmark_dir(benchmark_name))
monitor = None monitor = None
if self.__get_rank_id() == 0 and self._sb_monitor_config and self._sb_monitor_config.enable: if self.__get_rank_id() == 0 and self._sb_monitor_config and self._sb_monitor_config.enable:
...@@ -243,3 +245,4 @@ def exec(self): ...@@ -243,3 +245,4 @@ def exec(self):
if monitor: if monitor:
monitor.stop() monitor.stop()
self.__write_benchmark_results(benchmark_name, benchmark_results) self.__write_benchmark_results(benchmark_name, benchmark_results)
os.chdir(cwd)
...@@ -33,7 +33,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -33,7 +33,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
pattern = r'\d+\.\d+' pattern = r'\d+\.\d+'
result = re.findall(pattern, raw_output) result = re.findall(pattern, raw_output)
if len(result) != 2: if len(result) != 2:
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
"""Tests for RocmOnnxRuntimeModelBenchmark modules.""" """Tests for RocmOnnxRuntimeModelBenchmark modules."""
from types import SimpleNamespace
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, Platform, ReturnCode from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, Platform, ReturnCode
from superbench.benchmarks.result import BenchmarkResult from superbench.benchmarks.result import BenchmarkResult
...@@ -20,6 +22,7 @@ def test_rocm_onnxruntime_performance(): ...@@ -20,6 +22,7 @@ def test_rocm_onnxruntime_performance():
assert (benchmark._entrypoint == '/stage/onnxruntime-training-examples/huggingface/azureml/run_benchmark.sh') assert (benchmark._entrypoint == '/stage/onnxruntime-training-examples/huggingface/azureml/run_benchmark.sh')
assert (benchmark._cmd is None) assert (benchmark._cmd is None)
benchmark._result = BenchmarkResult(benchmark._name, benchmark._benchmark_type, ReturnCode.SUCCESS) benchmark._result = BenchmarkResult(benchmark._name, benchmark._benchmark_type, ReturnCode.SUCCESS)
benchmark._args = SimpleNamespace(log_raw_data=False)
raw_output = """ raw_output = """
__superbench__ begin bert-large-uncased ngpu=1 __superbench__ begin bert-large-uncased ngpu=1
......
...@@ -54,7 +54,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -54,7 +54,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
try: try:
params = raw_output.strip('\n').split('--') params = raw_output.strip('\n').split('--')
......
...@@ -53,7 +53,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -53,7 +53,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
try: try:
params = raw_output.strip('\n').split(' memory=') params = raw_output.strip('\n').split(' memory=')
......
...@@ -69,7 +69,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -69,7 +69,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
pattern = r'\d+\.\d+' pattern = r'\d+\.\d+'
result = re.findall(pattern, raw_output) result = re.findall(pattern, raw_output)
if len(result) != 2: if len(result) != 2:
......
...@@ -121,7 +121,7 @@ def test_tensorrt_inference_result_parsing(self, test_raw_log): ...@@ -121,7 +121,7 @@ def test_tensorrt_inference_result_parsing(self, test_raw_log):
"""Test tensorrt-inference benchmark result parsing.""" """Test tensorrt-inference benchmark result parsing."""
(benchmark_cls, _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(self.benchmark_name, Platform.CUDA) (benchmark_cls, _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(self.benchmark_name, Platform.CUDA)
benchmark = benchmark_cls(self.benchmark_name, parameters='') benchmark = benchmark_cls(self.benchmark_name, parameters='')
benchmark._args = SimpleNamespace(pytorch_models=['model_0', 'model_1']) benchmark._args = SimpleNamespace(pytorch_models=['model_0', 'model_1'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1) benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)
# Positive case - valid raw output # Positive case - valid raw output
......
...@@ -158,6 +158,8 @@ def test_arguments_related_interfaces(): ...@@ -158,6 +158,8 @@ def test_arguments_related_interfaces():
--duration int The elapsed time of benchmark in seconds. --duration int The elapsed time of benchmark in seconds.
--force_fp32 Enable option to use full float32 precision. --force_fp32 Enable option to use full float32 precision.
--hidden_size int Hidden size. --hidden_size int Hidden size.
--log_raw_data Log raw data into file instead of saving it into
result object.
--model_action ModelAction [ModelAction ...] --model_action ModelAction [ModelAction ...]
Benchmark model process. E.g. train inference. Benchmark model process. E.g. train inference.
--no_gpu Disable GPU training. --no_gpu Disable GPU training.
...@@ -192,6 +194,8 @@ def test_preprocess(): ...@@ -192,6 +194,8 @@ def test_preprocess():
--duration int The elapsed time of benchmark in seconds. --duration int The elapsed time of benchmark in seconds.
--force_fp32 Enable option to use full float32 precision. --force_fp32 Enable option to use full float32 precision.
--hidden_size int Hidden size. --hidden_size int Hidden size.
--log_raw_data Log raw data into file instead of saving it into
result object.
--model_action ModelAction [ModelAction ...] --model_action ModelAction [ModelAction ...]
Benchmark model process. E.g. train inference. Benchmark model process. E.g. train inference.
--no_gpu Disable GPU training. --no_gpu Disable GPU training.
......
...@@ -49,7 +49,7 @@ def _benchmark(self): ...@@ -49,7 +49,7 @@ def _benchmark(self):
raw_data.append(str(result)) raw_data.append(str(result))
metric = 'accumulation_result' metric = 'accumulation_result'
self._result.add_raw_data(metric, ','.join(raw_data)) self._result.add_raw_data(metric, ','.join(raw_data), self._args.log_raw_data)
self._result.add_result(metric, result) self._result.add_result(metric, result)
return True return True
...@@ -114,6 +114,8 @@ def test_get_benchmark_configurable_settings(): ...@@ -114,6 +114,8 @@ def test_get_benchmark_configurable_settings():
expected = """optional arguments: expected = """optional arguments:
--duration int The elapsed time of benchmark in seconds. --duration int The elapsed time of benchmark in seconds.
--log_raw_data Log raw data into file instead of saving it into result
object.
--lower_bound int The lower bound for accumulation. --lower_bound int The lower bound for accumulation.
--run_count int The run count of benchmark. --run_count int The run count of benchmark.
--upper_bound int The upper bound for accumulation.""" --upper_bound int The upper bound for accumulation."""
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
"""Tests for BenchmarkResult module.""" """Tests for BenchmarkResult module."""
import os
from superbench.benchmarks import BenchmarkType, ReturnCode, ReduceType from superbench.benchmarks import BenchmarkType, ReturnCode, ReduceType
from superbench.benchmarks.result import BenchmarkResult from superbench.benchmarks.result import BenchmarkResult
...@@ -10,22 +12,31 @@ ...@@ -10,22 +12,31 @@
def test_add_raw_data(): def test_add_raw_data():
"""Test interface BenchmarkResult.add_raw_data().""" """Test interface BenchmarkResult.add_raw_data()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS) result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
result.add_raw_data('metric1', 'raw log 1') result.add_raw_data('metric1', 'raw log 1', False)
result.add_raw_data('metric1', 'raw log 2') result.add_raw_data('metric1', 'raw log 2', False)
assert (result.raw_data['metric1'][0] == 'raw log 1') assert (result.raw_data['metric1'][0] == 'raw log 1')
assert (result.raw_data['metric1'][1] == 'raw log 2') assert (result.raw_data['metric1'][1] == 'raw log 2')
assert (result.type == BenchmarkType.MICRO) assert (result.type == BenchmarkType.MICRO)
assert (result.return_code == ReturnCode.SUCCESS) assert (result.return_code == ReturnCode.SUCCESS)
result = BenchmarkResult('model', BenchmarkType.MODEL, ReturnCode.SUCCESS) result = BenchmarkResult('model', BenchmarkType.MODEL, ReturnCode.SUCCESS)
result.add_raw_data('metric1', [1, 2, 3]) result.add_raw_data('metric1', [1, 2, 3], False)
result.add_raw_data('metric1', [4, 5, 6]) result.add_raw_data('metric1', [4, 5, 6], False)
assert (result.raw_data['metric1'][0] == [1, 2, 3]) assert (result.raw_data['metric1'][0] == [1, 2, 3])
assert (result.raw_data['metric1'][1] == [4, 5, 6]) assert (result.raw_data['metric1'][1] == [4, 5, 6])
assert (result.type == BenchmarkType.MODEL) assert (result.type == BenchmarkType.MODEL)
assert (result.return_code == ReturnCode.SUCCESS) assert (result.return_code == ReturnCode.SUCCESS)
# Test log_raw_data = True.
result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
result.add_raw_data('metric1', 'raw log 1', True)
result.add_raw_data('metric1', 'raw log 2', True)
assert (result.type == BenchmarkType.MICRO)
assert (result.return_code == ReturnCode.SUCCESS)
raw_data_file = os.path.join(os.getcwd(), 'rawdata.log')
assert (os.path.isfile(raw_data_file))
os.remove(raw_data_file)
def test_add_result(): def test_add_result():
"""Test interface BenchmarkResult.add_result().""" """Test interface BenchmarkResult.add_result()."""
...@@ -73,9 +84,9 @@ def test_serialize_deserialize(): ...@@ -73,9 +84,9 @@ def test_serialize_deserialize():
result.add_result('metric1', 300, ReduceType.MAX) result.add_result('metric1', 300, ReduceType.MAX)
result.add_result('metric1', 200, ReduceType.MAX) result.add_result('metric1', 200, ReduceType.MAX)
result.add_result('metric2', 100, ReduceType.AVG) result.add_result('metric2', 100, ReduceType.AVG)
result.add_raw_data('metric1', [1, 2, 3]) result.add_raw_data('metric1', [1, 2, 3], False)
result.add_raw_data('metric1', [4, 5, 6]) result.add_raw_data('metric1', [4, 5, 6], False)
result.add_raw_data('metric1', [7, 8, 9]) result.add_raw_data('metric1', [7, 8, 9], False)
start_time = '2021-02-03 16:59:49' start_time = '2021-02-03 16:59:49'
end_time = '2021-02-03 17:00:08' end_time = '2021-02-03 17:00:08'
result.set_timestamp(start_time, end_time) result.set_timestamp(start_time, end_time)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment