Unverified Commit 44f0270e authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

Benchmarks: Add Feature - Add return_code metric into result (#256)

**Description**
Add return_code metric into result and revise unit tests.
parent 655f238d
...@@ -283,3 +283,8 @@ def result(self): ...@@ -283,3 +283,8 @@ def result(self):
def serialized_result(self): def serialized_result(self):
"""Decoration function to access benchmark result.""" """Decoration function to access benchmark result."""
return self._result.to_string() return self._result.to_string()
@property
def default_metric_count(self):
"""Decoration function to get the count of default metrics."""
return self._result.default_metric_count
...@@ -31,6 +31,7 @@ def __init__(self, name, type, return_code, run_count=0): ...@@ -31,6 +31,7 @@ def __init__(self, name, type, return_code, run_count=0):
self.__end_time = None self.__end_time = None
self.__raw_data = dict() self.__raw_data = dict()
self.__result = dict() self.__result = dict()
self.__result['return_code'] = [return_code.value]
self.__reduce_op = dict() self.__reduce_op = dict()
def __eq__(self, rhs): def __eq__(self, rhs):
...@@ -119,6 +120,7 @@ def set_return_code(self, return_code): ...@@ -119,6 +120,7 @@ def set_return_code(self, return_code):
return_code (ReturnCode): return code defined in superbench.benchmarks.ReturnCode. return_code (ReturnCode): return code defined in superbench.benchmarks.ReturnCode.
""" """
self.__return_code = return_code self.__return_code = return_code
self.__result['return_code'][0] = return_code.value
def to_string(self): def to_string(self):
"""Serialize the BenchmarkResult object to string. """Serialize the BenchmarkResult object to string.
...@@ -158,6 +160,15 @@ def return_code(self): ...@@ -158,6 +160,15 @@ def return_code(self):
"""Decoration function to access __return_code.""" """Decoration function to access __return_code."""
return self.__return_code return self.__return_code
@property
def default_metric_count(self):
"""Decoration function to get the count of default metrics."""
count = 0
if 'return_code' in self.__result:
count += 1
return count
@property @property
def start_time(self): def start_time(self):
"""Decoration function to access __start_time.""" """Decoration function to access __start_time."""
......
...@@ -45,8 +45,8 @@ def test_pytorch_computation_communication_overlap_normal(): ...@@ -45,8 +45,8 @@ def test_pytorch_computation_communication_overlap_normal():
assert (benchmark.run_count == 1) assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS) assert (benchmark.return_code == ReturnCode.SUCCESS)
assert (len(benchmark.raw_data) == benchmark.run_count * len(benchmark._args.kernel)) assert (len(benchmark.raw_data) == len(benchmark._args.kernel))
assert (len(benchmark.result) == benchmark.run_count * len(benchmark._args.kernel)) assert (len(benchmark.result) == len(benchmark._args.kernel) + benchmark.default_metric_count)
@decorator.cuda_test @decorator.cuda_test
...@@ -79,6 +79,6 @@ def test_pytorch_computation_communication_overlap_fake_distributed(): ...@@ -79,6 +79,6 @@ def test_pytorch_computation_communication_overlap_fake_distributed():
assert (benchmark.run_count == 1) assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS) assert (benchmark.return_code == ReturnCode.SUCCESS)
assert (len(benchmark.raw_data) == benchmark.run_count * len(benchmark._args.kernel)) assert (len(benchmark.raw_data) == len(benchmark._args.kernel))
assert (len(benchmark.result) == benchmark.run_count * len(benchmark._args.kernel)) assert (len(benchmark.result) == len(benchmark._args.kernel) + benchmark.default_metric_count)
utils.clean_simulated_ddp_distributed_env() utils.clean_simulated_ddp_distributed_env()
...@@ -42,6 +42,7 @@ def test_cublas_functions(): ...@@ -42,6 +42,7 @@ def test_cublas_functions():
for metric in list(benchmark.result.keys()): for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1) assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number)) assert (isinstance(benchmark.result[metric][0], numbers.Number))
if metric != 'return_code':
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps) assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
# Test for custom configuration # Test for custom configuration
...@@ -73,8 +74,9 @@ def test_cublas_functions(): ...@@ -73,8 +74,9 @@ def test_cublas_functions():
assert (len(benchmark.raw_data['raw_output_0']) == 1) assert (len(benchmark.raw_data['raw_output_0']) == 1)
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str)) assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
assert (1 == len(benchmark.result)) assert (1 + benchmark.default_metric_count == len(benchmark.result))
for metric in list(benchmark.result.keys()): for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1) assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number)) assert (isinstance(benchmark.result[metric][0], numbers.Number))
if metric != 'return_code':
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps) assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
...@@ -42,6 +42,7 @@ def test_cudnn_functions(): ...@@ -42,6 +42,7 @@ def test_cudnn_functions():
for metric in list(benchmark.result.keys()): for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1) assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number)) assert (isinstance(benchmark.result[metric][0], numbers.Number))
if metric != 'return_code':
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps) assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
# Test for custom configuration # Test for custom configuration
...@@ -77,8 +78,9 @@ def test_cudnn_functions(): ...@@ -77,8 +78,9 @@ def test_cudnn_functions():
assert (len(benchmark.raw_data['raw_output_0']) == 1) assert (len(benchmark.raw_data['raw_output_0']) == 1)
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str)) assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
assert (1 == len(benchmark.result)) assert (1 + benchmark.default_metric_count == len(benchmark.result))
for metric in list(benchmark.result.keys()): for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1) assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number)) assert (isinstance(benchmark.result[metric][0], numbers.Number))
if metric != 'return_code':
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps) assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
...@@ -509,7 +509,7 @@ def test_disk_performance_result_parsing(self): ...@@ -509,7 +509,7 @@ def test_disk_performance_result_parsing(self):
assert (benchmark.return_code == ReturnCode.SUCCESS) assert (benchmark.return_code == ReturnCode.SUCCESS)
# bs + <read, write> x <iops, 95th, 99th, 99.9th> # bs + <read, write> x <iops, 95th, 99th, 99.9th>
assert (9 == len(benchmark.result.keys())) assert (9 + benchmark.default_metric_count == len(benchmark.result.keys()))
assert (1 == len(benchmark.result[jobname_prefix + '_bs'])) assert (1 == len(benchmark.result[jobname_prefix + '_bs']))
assert (4096 == benchmark.result[jobname_prefix + '_bs'][0]) assert (4096 == benchmark.result[jobname_prefix + '_bs'][0])
......
...@@ -93,7 +93,7 @@ def test_gpcnet_network_test(self): ...@@ -93,7 +93,7 @@ def test_gpcnet_network_test(self):
-------------------------------------------------------------------------- --------------------------------------------------------------------------
""" """
assert (benchmark._process_raw_result(0, raw_output_no_execution)) assert (benchmark._process_raw_result(0, raw_output_no_execution))
assert (len(benchmark.result) == 0) assert (len(benchmark.result) == benchmark.default_metric_count)
# Check function process_raw_data. # Check function process_raw_data.
# Positive case - valid raw output. # Positive case - valid raw output.
...@@ -250,7 +250,7 @@ def test_gpcnet_network_load(self): # noqa: C901 ...@@ -250,7 +250,7 @@ def test_gpcnet_network_load(self): # noqa: C901
-------------------------------------------------------------------------- --------------------------------------------------------------------------
""" """
assert (benchmark._process_raw_result(0, raw_output_no_execution)) assert (benchmark._process_raw_result(0, raw_output_no_execution))
assert (len(benchmark.result) == 0) assert (len(benchmark.result) == benchmark.default_metric_count)
# Positive case - valid raw output. # Positive case - valid raw output.
assert (benchmark._process_raw_result(0, raw_output)) assert (benchmark._process_raw_result(0, raw_output))
test_name = 'IsolatedNetworkTests' test_name = 'IsolatedNetworkTests'
......
...@@ -112,8 +112,11 @@ def _test_gpu_copy_bw_performance_result_parsing(self, platform): ...@@ -112,8 +112,11 @@ def _test_gpu_copy_bw_performance_result_parsing(self, platform):
assert (1 == len(benchmark.raw_data)) assert (1 == len(benchmark.raw_data))
print(test_raw_output.splitlines()) print(test_raw_output.splitlines())
test_raw_output_dict = {x.split()[0]: float(x.split()[1]) for x in test_raw_output.strip().splitlines()} test_raw_output_dict = {x.split()[0]: float(x.split()[1]) for x in test_raw_output.strip().splitlines()}
assert (len(test_raw_output_dict) == len(benchmark.result)) assert (len(test_raw_output_dict) + benchmark.default_metric_count == len(benchmark.result))
for output_key in benchmark.result: for output_key in benchmark.result:
if output_key == 'return_code':
assert (benchmark.result[output_key] == [0])
else:
assert (len(benchmark.result[output_key]) == 1) assert (len(benchmark.result[output_key]) == 1)
assert (isinstance(benchmark.result[output_key][0], numbers.Number)) assert (isinstance(benchmark.result[output_key][0], numbers.Number))
assert (output_key in test_raw_output_dict) assert (output_key in test_raw_output_dict)
......
...@@ -133,7 +133,7 @@ def test_tensorrt_inference_result_parsing(self, test_raw_log): ...@@ -133,7 +133,7 @@ def test_tensorrt_inference_result_parsing(self, test_raw_log):
self.assertTrue(benchmark._process_raw_result(0, test_raw_log)) self.assertTrue(benchmark._process_raw_result(0, test_raw_log))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code) self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)
self.assertEqual(6, len(benchmark.result)) self.assertEqual(6 + benchmark.default_metric_count, len(benchmark.result))
for tag in ['mean', '99']: for tag in ['mean', '99']:
self.assertEqual(0.5, benchmark.result[f'gpu_lat_ms_{tag}'][0]) self.assertEqual(0.5, benchmark.result[f'gpu_lat_ms_{tag}'][0])
self.assertEqual(0.6, benchmark.result[f'host_lat_ms_{tag}'][0]) self.assertEqual(0.6, benchmark.result[f'host_lat_ms_{tag}'][0])
......
...@@ -225,7 +225,7 @@ def test_train(): ...@@ -225,7 +225,7 @@ def test_train():
'"start_time": null, "end_time": null, "raw_data": {' '"start_time": null, "end_time": null, "raw_data": {'
'"steptime_train_float32": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], ' '"steptime_train_float32": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, ' '"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0]}, ' '"result": {"return_code": [0], "steptime_train_float32": [2.0], "throughput_train_float32": [16000.0]}, '
'"reduce_op": {"steptime_train_float32": "max", "throughput_train_float32": "min"}}' '"reduce_op": {"steptime_train_float32": "max", "throughput_train_float32": "min"}}'
) )
assert (benchmark._preprocess()) assert (benchmark._preprocess())
...@@ -236,7 +236,7 @@ def test_train(): ...@@ -236,7 +236,7 @@ def test_train():
benchmark = create_benchmark('--num_steps 0') benchmark = create_benchmark('--num_steps 0')
expected_result = ( expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 3, ' '{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 3, '
'"start_time": null, "end_time": null, "raw_data": {}, "result": {}, "reduce_op": {}}' '"start_time": null, "end_time": null, "raw_data": {}, "result": {"return_code": [3]}, "reduce_op": {}}'
) )
assert (benchmark._preprocess()) assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__train(Precision.FLOAT32) is False) assert (benchmark._ModelBenchmark__train(Precision.FLOAT32) is False)
...@@ -251,8 +251,8 @@ def test_inference(): ...@@ -251,8 +251,8 @@ def test_inference():
'"start_time": null, "end_time": null, "raw_data": {' '"start_time": null, "end_time": null, "raw_data": {'
'"steptime_inference_float16": [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]], ' '"steptime_inference_float16": [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]], '
'"throughput_inference_float16": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, ' '"throughput_inference_float16": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"result": {"steptime_inference_float16": [4.0], "throughput_inference_float16": [8000.0]}, ' '"result": {"return_code": [0], "steptime_inference_float16": [4.0], "throughput_inference_float16": '
'"reduce_op": {"steptime_inference_float16": null, "throughput_inference_float16": null}}' '[8000.0]}, "reduce_op": {"steptime_inference_float16": null, "throughput_inference_float16": null}}'
) )
assert (benchmark._preprocess()) assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16)) assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16))
...@@ -262,7 +262,7 @@ def test_inference(): ...@@ -262,7 +262,7 @@ def test_inference():
benchmark = create_benchmark('--num_steps 0') benchmark = create_benchmark('--num_steps 0')
expected_result = ( expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 3, ' '{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 3, '
'"start_time": null, "end_time": null, "raw_data": {}, "result": {}, "reduce_op": {}}' '"start_time": null, "end_time": null, "raw_data": {}, "result": {"return_code": [3]}, "reduce_op": {}}'
) )
assert (benchmark._preprocess()) assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16) is False) assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16) is False)
...@@ -287,6 +287,7 @@ def test_benchmark(): ...@@ -287,6 +287,7 @@ def test_benchmark():
} }
assert (benchmark.raw_data == expected_raw_data) assert (benchmark.raw_data == expected_raw_data)
expected_result = { expected_result = {
'return_code': [0],
'steptime_train_float32': [2.0], 'steptime_train_float32': [2.0],
'throughput_train_float32': [16000.0], 'throughput_train_float32': [16000.0],
'steptime_train_float16': [2.0], 'steptime_train_float16': [2.0],
...@@ -300,7 +301,7 @@ def test_benchmark(): ...@@ -300,7 +301,7 @@ def test_benchmark():
'"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], ' '"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
'"steptime_train_float16": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], ' '"steptime_train_float16": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"throughput_train_float16": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, ' '"throughput_train_float16": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0], ' '"result": {"return_code": [0], "steptime_train_float32": [2.0], "throughput_train_float32": [16000.0], '
'"steptime_train_float16": [2.0], "throughput_train_float16": [16000.0]}, ' '"steptime_train_float16": [2.0], "throughput_train_float16": [16000.0]}, '
'"reduce_op": {"steptime_train_float32": "max", "throughput_train_float32": "min", ' '"reduce_op": {"steptime_train_float32": "max", "throughput_train_float32": "min", '
'"steptime_train_float16": "max", "throughput_train_float16": "min"}}' '"steptime_train_float16": "max", "throughput_train_float16": "min"}}'
...@@ -337,7 +338,7 @@ def test_check_result_format(): ...@@ -337,7 +338,7 @@ def test_check_result_format():
assert (benchmark._Benchmark__check_raw_data()) assert (benchmark._Benchmark__check_raw_data())
# Negative case for __check_result_format() - change List[int] to List[str]. # Negative case for __check_result_format() - change List[int] to List[str].
benchmark._result._BenchmarkResult__result = {'metric1': ['2.0']} benchmark._result._BenchmarkResult__result = {'return_code': [0], 'metric1': ['2.0']}
assert (benchmark._Benchmark__check_summarized_result() is False) assert (benchmark._Benchmark__check_summarized_result() is False)
# Negative case for __check_raw_data() - change List[List[int]] to List[List[str]]. # Negative case for __check_raw_data() - change List[List[int]] to List[List[str]].
......
...@@ -139,7 +139,7 @@ def test_launch_benchmark(): ...@@ -139,7 +139,7 @@ def test_launch_benchmark():
assert (benchmark.run_count == 1) assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS) assert (benchmark.return_code == ReturnCode.SUCCESS)
assert (benchmark.raw_data == {'accumulation_result': ['1,3,6,10']}) assert (benchmark.raw_data == {'accumulation_result': ['1,3,6,10']})
assert (benchmark.result == {'accumulation_result': [10]}) assert (benchmark.result == {'return_code': [0], 'accumulation_result': [10]})
# Replace the timestamp as null. # Replace the timestamp as null.
result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result) result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result)
...@@ -147,7 +147,7 @@ def test_launch_benchmark(): ...@@ -147,7 +147,7 @@ def test_launch_benchmark():
'{"name": "accumulation", "type": "micro", "run_count": 1, ' '{"name": "accumulation", "type": "micro", "run_count": 1, '
'"return_code": 0, "start_time": null, "end_time": null, ' '"return_code": 0, "start_time": null, "end_time": null, '
'"raw_data": {"accumulation_result": ["1,3,6,10"]}, ' '"raw_data": {"accumulation_result": ["1,3,6,10"]}, '
'"result": {"accumulation_result": [10]}, ' '"result": {"return_code": [0], "accumulation_result": [10]}, '
'"reduce_op": {"accumulation_result": null}}' '"reduce_op": {"accumulation_result": null}}'
) )
assert (result == expected) assert (result == expected)
...@@ -163,7 +163,7 @@ def test_launch_benchmark(): ...@@ -163,7 +163,7 @@ def test_launch_benchmark():
assert (benchmark.run_count == 1) assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS) assert (benchmark.return_code == ReturnCode.SUCCESS)
assert (benchmark.raw_data == {'accumulation_result': ['1,3,6']}) assert (benchmark.raw_data == {'accumulation_result': ['1,3,6']})
assert (benchmark.result == {'accumulation_result': [6]}) assert (benchmark.result == {'return_code': [0], 'accumulation_result': [6]})
# Replace the timestamp as null. # Replace the timestamp as null.
result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result) result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result)
...@@ -171,7 +171,7 @@ def test_launch_benchmark(): ...@@ -171,7 +171,7 @@ def test_launch_benchmark():
'{"name": "accumulation", "type": "micro", "run_count": 1, ' '{"name": "accumulation", "type": "micro", "run_count": 1, '
'"return_code": 0, "start_time": null, "end_time": null, ' '"return_code": 0, "start_time": null, "end_time": null, '
'"raw_data": {"accumulation_result": ["1,3,6"]}, ' '"raw_data": {"accumulation_result": ["1,3,6"]}, '
'"result": {"accumulation_result": [6]}, ' '"result": {"return_code": [0], "accumulation_result": [6]}, '
'"reduce_op": {"accumulation_result": null}}' '"reduce_op": {"accumulation_result": null}}'
) )
assert (result == expected) assert (result == expected)
......
...@@ -9,27 +9,27 @@ ...@@ -9,27 +9,27 @@
def test_add_raw_data(): def test_add_raw_data():
"""Test interface BenchmarkResult.add_raw_data().""" """Test interface BenchmarkResult.add_raw_data()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value) result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
result.add_raw_data('metric1', 'raw log 1') result.add_raw_data('metric1', 'raw log 1')
result.add_raw_data('metric1', 'raw log 2') result.add_raw_data('metric1', 'raw log 2')
assert (result.raw_data['metric1'][0] == 'raw log 1') assert (result.raw_data['metric1'][0] == 'raw log 1')
assert (result.raw_data['metric1'][1] == 'raw log 2') assert (result.raw_data['metric1'][1] == 'raw log 2')
assert (result.type == BenchmarkType.MICRO.value) assert (result.type == BenchmarkType.MICRO)
assert (result.return_code == ReturnCode.SUCCESS.value) assert (result.return_code == ReturnCode.SUCCESS)
result = BenchmarkResult('model', BenchmarkType.MODEL.value, ReturnCode.SUCCESS.value) result = BenchmarkResult('model', BenchmarkType.MODEL, ReturnCode.SUCCESS)
result.add_raw_data('metric1', [1, 2, 3]) result.add_raw_data('metric1', [1, 2, 3])
result.add_raw_data('metric1', [4, 5, 6]) result.add_raw_data('metric1', [4, 5, 6])
assert (result.raw_data['metric1'][0] == [1, 2, 3]) assert (result.raw_data['metric1'][0] == [1, 2, 3])
assert (result.raw_data['metric1'][1] == [4, 5, 6]) assert (result.raw_data['metric1'][1] == [4, 5, 6])
assert (result.type == BenchmarkType.MODEL.value) assert (result.type == BenchmarkType.MODEL)
assert (result.return_code == ReturnCode.SUCCESS.value) assert (result.return_code == ReturnCode.SUCCESS)
def test_add_result(): def test_add_result():
"""Test interface BenchmarkResult.add_result().""" """Test interface BenchmarkResult.add_result()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value) result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
result.add_result('metric1', 300) result.add_result('metric1', 300)
result.add_result('metric1', 200) result.add_result('metric1', 200)
assert (result.result['metric1'][0] == 300) assert (result.result['metric1'][0] == 300)
...@@ -38,7 +38,7 @@ def test_add_result(): ...@@ -38,7 +38,7 @@ def test_add_result():
def test_set_timestamp(): def test_set_timestamp():
"""Test interface BenchmarkResult.set_timestamp().""" """Test interface BenchmarkResult.set_timestamp()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value) result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
start_time = '2021-02-03 16:59:49' start_time = '2021-02-03 16:59:49'
end_time = '2021-02-03 17:00:08' end_time = '2021-02-03 17:00:08'
result.set_timestamp(start_time, end_time) result.set_timestamp(start_time, end_time)
...@@ -48,25 +48,28 @@ def test_set_timestamp(): ...@@ -48,25 +48,28 @@ def test_set_timestamp():
def test_set_benchmark_type(): def test_set_benchmark_type():
"""Test interface BenchmarkResult.set_benchmark_type().""" """Test interface BenchmarkResult.set_benchmark_type()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value) result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
result.set_benchmark_type(BenchmarkType.MICRO.value) result.set_benchmark_type(BenchmarkType.MICRO)
assert (result.type == BenchmarkType.MICRO.value) assert (result.type == BenchmarkType.MICRO)
def test_set_return_code(): def test_set_return_code():
"""Test interface BenchmarkResult.set_return_code().""" """Test interface BenchmarkResult.set_return_code()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value) result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
assert (result.return_code == ReturnCode.SUCCESS.value) assert (result.return_code == ReturnCode.SUCCESS)
result.set_return_code(ReturnCode.INVALID_ARGUMENT.value) assert (result.result['return_code'] == [ReturnCode.SUCCESS.value])
assert (result.return_code == ReturnCode.INVALID_ARGUMENT.value) result.set_return_code(ReturnCode.INVALID_ARGUMENT)
result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT.value) assert (result.return_code == ReturnCode.INVALID_ARGUMENT)
assert (result.return_code == ReturnCode.INVALID_BENCHMARK_RESULT.value) assert (result.result['return_code'] == [ReturnCode.INVALID_ARGUMENT.value])
result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT)
assert (result.return_code == ReturnCode.INVALID_BENCHMARK_RESULT)
assert (result.result['return_code'] == [ReturnCode.INVALID_BENCHMARK_RESULT.value])
def test_serialize_deserialize(): def test_serialize_deserialize():
"""Test serialization/deserialization and compare the results.""" """Test serialization/deserialization and compare the results."""
# Result with one metric. # Result with one metric.
result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value, run_count=2) result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=2)
result.add_result('metric1', 300, ReduceType.MAX) result.add_result('metric1', 300, ReduceType.MAX)
result.add_result('metric1', 200, ReduceType.MAX) result.add_result('metric1', 200, ReduceType.MAX)
result.add_result('metric2', 100, ReduceType.AVG) result.add_result('metric2', 100, ReduceType.AVG)
...@@ -76,13 +79,13 @@ def test_serialize_deserialize(): ...@@ -76,13 +79,13 @@ def test_serialize_deserialize():
start_time = '2021-02-03 16:59:49' start_time = '2021-02-03 16:59:49'
end_time = '2021-02-03 17:00:08' end_time = '2021-02-03 17:00:08'
result.set_timestamp(start_time, end_time) result.set_timestamp(start_time, end_time)
result.set_benchmark_type(BenchmarkType.MICRO.value) result.set_benchmark_type(BenchmarkType.MICRO)
expected = ( expected = (
'{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, ' '{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, '
'"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", ' '"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", '
'"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, ' '"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, '
'"result": {"metric1": [300, 200], "metric2": [100]}, ' '"result": {"return_code": [0], "metric1": [300, 200], "metric2": [100]}, '
'"reduce_op": {"metric1": "max", "metric2": "avg"}}' '"reduce_op": {"metric1": "max", "metric2": "avg"}}'
) )
assert (result.to_string() == expected) assert (result.to_string() == expected)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment