Unverified Commit 6d895da8 authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

Benchmarks: Add Feature - Provide option to save raw data into file. (#333)

**Description**
Use config `log_raw_data` to control whether log the raw data into file or not. The default value is `no`. We can set it as `yes` for some particular benchmarks to save the raw data into file, such as NCCL/RCCL test.
parent d368d90e
......@@ -127,7 +127,9 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data(f'raw_output_{self._args.pytorch_models[cmd_idx]}', raw_output)
self._result.add_raw_data(
f'raw_output_{self._args.pytorch_models[cmd_idx]}', raw_output, self._args.log_raw_data
)
success = False
try:
......
......@@ -400,8 +400,8 @@ def __process_model_result(self, model_action, precision, step_times):
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
millisecond_per_second = 1000
throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times]
self._result.add_raw_data(metric_s, step_times)
self._result.add_raw_data(metric_t, throughput)
self._result.add_raw_data(metric_s, step_times, self._args.log_raw_data)
self._result.add_raw_data(metric_t, throughput, self._args.log_raw_data)
if model_action == ModelAction.TRAIN:
if not self._sync_result(step_times):
......
......@@ -3,6 +3,7 @@
"""A module for unified result of benchmarks."""
import os
import json
from enum import Enum
......@@ -46,7 +47,7 @@ def __eq__(self, rhs):
"""
return self.__dict__ == rhs.__dict__
def add_raw_data(self, metric, value):
def add_raw_data(self, metric, value, log_raw_data):
"""Add raw benchmark data into result.
Args:
......@@ -54,6 +55,7 @@ def add_raw_data(self, metric, value):
value (str or list): raw benchmark data.
For e2e model benchmarks, its type is list.
For micro-benchmarks or docker-benchmarks, its type is string.
log_raw_data (bool): whether to log raw data into file instead of saving it into result object.
Return:
True if succeed to add the raw data.
......@@ -64,6 +66,11 @@ def add_raw_data(self, metric, value):
)
return False
if log_raw_data:
with open(os.path.join(os.getcwd(), 'rawdata.log'), 'a') as f:
f.write('metric:{}\n'.format(metric))
f.write('rawdata:{}\n\n'.format(value))
else:
if metric not in self.__raw_data:
self.__raw_data[metric] = list()
self.__raw_data[metric].append(value)
......
......@@ -200,6 +200,8 @@ def exec(self):
benchmark_config = self._sb_benchmarks[benchmark_name]
benchmark_results = list()
self.__create_benchmark_dir(benchmark_name)
cwd = os.getcwd()
os.chdir(self.__get_benchmark_dir(benchmark_name))
monitor = None
if self.__get_rank_id() == 0 and self._sb_monitor_config and self._sb_monitor_config.enable:
......@@ -243,3 +245,4 @@ def exec(self):
if monitor:
monitor.stop()
self.__write_benchmark_results(benchmark_name, benchmark_results)
os.chdir(cwd)
......@@ -33,7 +33,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
pattern = r'\d+\.\d+'
result = re.findall(pattern, raw_output)
if len(result) != 2:
......
......@@ -3,6 +3,8 @@
"""Tests for RocmOnnxRuntimeModelBenchmark modules."""
from types import SimpleNamespace
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, Platform, ReturnCode
from superbench.benchmarks.result import BenchmarkResult
......@@ -20,6 +22,7 @@ def test_rocm_onnxruntime_performance():
assert (benchmark._entrypoint == '/stage/onnxruntime-training-examples/huggingface/azureml/run_benchmark.sh')
assert (benchmark._cmd is None)
benchmark._result = BenchmarkResult(benchmark._name, benchmark._benchmark_type, ReturnCode.SUCCESS)
benchmark._args = SimpleNamespace(log_raw_data=False)
raw_output = """
__superbench__ begin bert-large-uncased ngpu=1
......
......@@ -54,7 +54,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
try:
params = raw_output.strip('\n').split('--')
......
......@@ -53,7 +53,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
try:
params = raw_output.strip('\n').split(' memory=')
......
......@@ -69,7 +69,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
pattern = r'\d+\.\d+'
result = re.findall(pattern, raw_output)
if len(result) != 2:
......
......@@ -121,7 +121,7 @@ def test_tensorrt_inference_result_parsing(self, test_raw_log):
"""Test tensorrt-inference benchmark result parsing."""
(benchmark_cls, _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(self.benchmark_name, Platform.CUDA)
benchmark = benchmark_cls(self.benchmark_name, parameters='')
benchmark._args = SimpleNamespace(pytorch_models=['model_0', 'model_1'])
benchmark._args = SimpleNamespace(pytorch_models=['model_0', 'model_1'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)
# Positive case - valid raw output
......
......@@ -158,6 +158,8 @@ def test_arguments_related_interfaces():
--duration int The elapsed time of benchmark in seconds.
--force_fp32 Enable option to use full float32 precision.
--hidden_size int Hidden size.
--log_raw_data Log raw data into file instead of saving it into
result object.
--model_action ModelAction [ModelAction ...]
Benchmark model process. E.g. train inference.
--no_gpu Disable GPU training.
......@@ -192,6 +194,8 @@ def test_preprocess():
--duration int The elapsed time of benchmark in seconds.
--force_fp32 Enable option to use full float32 precision.
--hidden_size int Hidden size.
--log_raw_data Log raw data into file instead of saving it into
result object.
--model_action ModelAction [ModelAction ...]
Benchmark model process. E.g. train inference.
--no_gpu Disable GPU training.
......
......@@ -49,7 +49,7 @@ def _benchmark(self):
raw_data.append(str(result))
metric = 'accumulation_result'
self._result.add_raw_data(metric, ','.join(raw_data))
self._result.add_raw_data(metric, ','.join(raw_data), self._args.log_raw_data)
self._result.add_result(metric, result)
return True
......@@ -114,6 +114,8 @@ def test_get_benchmark_configurable_settings():
expected = """optional arguments:
--duration int The elapsed time of benchmark in seconds.
--log_raw_data Log raw data into file instead of saving it into result
object.
--lower_bound int The lower bound for accumulation.
--run_count int The run count of benchmark.
--upper_bound int The upper bound for accumulation."""
......
......@@ -3,6 +3,8 @@
"""Tests for BenchmarkResult module."""
import os
from superbench.benchmarks import BenchmarkType, ReturnCode, ReduceType
from superbench.benchmarks.result import BenchmarkResult
......@@ -10,22 +12,31 @@
def test_add_raw_data():
"""Test interface BenchmarkResult.add_raw_data()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
result.add_raw_data('metric1', 'raw log 1')
result.add_raw_data('metric1', 'raw log 2')
result.add_raw_data('metric1', 'raw log 1', False)
result.add_raw_data('metric1', 'raw log 2', False)
assert (result.raw_data['metric1'][0] == 'raw log 1')
assert (result.raw_data['metric1'][1] == 'raw log 2')
assert (result.type == BenchmarkType.MICRO)
assert (result.return_code == ReturnCode.SUCCESS)
result = BenchmarkResult('model', BenchmarkType.MODEL, ReturnCode.SUCCESS)
result.add_raw_data('metric1', [1, 2, 3])
result.add_raw_data('metric1', [4, 5, 6])
result.add_raw_data('metric1', [1, 2, 3], False)
result.add_raw_data('metric1', [4, 5, 6], False)
assert (result.raw_data['metric1'][0] == [1, 2, 3])
assert (result.raw_data['metric1'][1] == [4, 5, 6])
assert (result.type == BenchmarkType.MODEL)
assert (result.return_code == ReturnCode.SUCCESS)
# Test log_raw_data = True.
result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
result.add_raw_data('metric1', 'raw log 1', True)
result.add_raw_data('metric1', 'raw log 2', True)
assert (result.type == BenchmarkType.MICRO)
assert (result.return_code == ReturnCode.SUCCESS)
raw_data_file = os.path.join(os.getcwd(), 'rawdata.log')
assert (os.path.isfile(raw_data_file))
os.remove(raw_data_file)
def test_add_result():
"""Test interface BenchmarkResult.add_result()."""
......@@ -73,9 +84,9 @@ def test_serialize_deserialize():
result.add_result('metric1', 300, ReduceType.MAX)
result.add_result('metric1', 200, ReduceType.MAX)
result.add_result('metric2', 100, ReduceType.AVG)
result.add_raw_data('metric1', [1, 2, 3])
result.add_raw_data('metric1', [4, 5, 6])
result.add_raw_data('metric1', [7, 8, 9])
result.add_raw_data('metric1', [1, 2, 3], False)
result.add_raw_data('metric1', [4, 5, 6], False)
result.add_raw_data('metric1', [7, 8, 9], False)
start_time = '2021-02-03 16:59:49'
end_time = '2021-02-03 17:00:08'
result.set_timestamp(start_time, end_time)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment