Unverified Commit e41b1f62 authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

Benchmarks: Add Feature - Add reduce function support for output summary. (#147)

**Description**
Add reduce function support for output summary.

**Major Revision**
- Add reducer class to maintain all reduce functions.
- Save reduce type of each metric into `BenchmarkResult`
- Fix UT.
parent 86c390a9
......@@ -8,6 +8,7 @@
from superbench.benchmarks.return_code import ReturnCode
from superbench.benchmarks.context import Platform, Framework, Precision, ModelAction, \
DistributedImpl, DistributedBackend, BenchmarkType, BenchmarkContext
from superbench.benchmarks.reducer import ReduceType
from superbench.common.utils import LazyImport
BenchmarkRegistry = LazyImport(
......@@ -23,5 +24,5 @@
__all__ = [
'ReturnCode', 'Platform', 'Framework', 'BenchmarkType', 'Precision', 'ModelAction', 'DistributedImpl',
'DistributedBackend', 'BenchmarkContext', 'BenchmarkRegistry'
'DistributedBackend', 'BenchmarkContext', 'BenchmarkRegistry', 'ReduceType'
]
......@@ -49,12 +49,13 @@ def _benchmark(self):
"""
pass
def _process_numeric_result(self, metric, result):
def _process_numeric_result(self, metric, result, reduce_type=None):
"""Function to save the numerical results.
Args:
metric (str): metric name which is the key.
result (List[numbers.Number]): numerical result.
reduce_type (ReduceType): The type of reduce function.
Return:
True if result list is not empty.
......@@ -68,7 +69,7 @@ def _process_numeric_result(self, metric, result):
return False
self._result.add_raw_data(metric, result)
self._result.add_result(metric, sum(result) / len(result))
self._result.add_result(metric, sum(result) / len(result), reduce_type)
return True
def print_env_info(self):
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""A module for result reducer."""
from typing import Dict, Callable
from statistics import mean
from superbench.benchmarks.context import Enum
class ReduceType(Enum):
"""The Enum class representing different reducer."""
AVG = 'avg'
MAX = 'max'
MIN = 'min'
SUM = 'sum'
class Reducer:
"""Reducer class to maintain all reduce functions."""
functions: Dict[ReduceType, Callable] = dict()
@classmethod
def add_reduce_func(cls, reduce_type):
"""Add reduce fuction.
Args:
reduce_type (ReduceType): The type of reduce function.
Return:
decorator (Callable): return the decorator to add the reduce function.
"""
def decorator(func):
cls.functions[reduce_type] = func
return func
return decorator
@classmethod
def get_reduce_func(cls, reduce_type):
"""Get reduce fuction by reduce_type.
Args:
reduce_type (ReduceType): The type of reduce function.
Return:
func (Callable): reduce function, None means invalid reduce type.
"""
if reduce_type in cls.functions:
return cls.functions[reduce_type]
return None
Reducer.add_reduce_func(ReduceType.MAX)(max)
Reducer.add_reduce_func(ReduceType.MIN)(min)
Reducer.add_reduce_func(ReduceType.SUM)(sum)
Reducer.add_reduce_func(ReduceType.AVG)(mean)
......@@ -31,6 +31,7 @@ def __init__(self, name, type, return_code, run_count=0):
self.__end_time = None
self.__raw_data = dict()
self.__result = dict()
self.__reduce = dict()
def __eq__(self, rhs):
"""Override equal function for deep comparison.
......@@ -67,7 +68,7 @@ def add_raw_data(self, metric, value):
return True
def add_result(self, metric, value):
def add_result(self, metric, value, reduce_type=None):
"""Add summarized data into result.
Args:
......@@ -75,6 +76,7 @@ def add_result(self, metric, value):
value (float): summarized data.
For e2e model benchmarks, the value is step-time or throughput.
For micro-benchmarks, the value is FLOPS, bandwidth and etc.
reduce_type (ReduceType): type of reduce function.
Return:
True if succeed to add the result.
......@@ -87,6 +89,7 @@ def add_result(self, metric, value):
if metric not in self.__result:
self.__result[metric] = list()
self.__reduce[metric] = reduce_type.value if isinstance(reduce_type, Enum) else None
self.__result[metric].append(value)
return True
......
......@@ -197,7 +197,6 @@ def test_preprocess():
--hidden_size int Hidden size.
--seq_len int Sequence length."""
)
print(settings)
assert (settings == expected_settings)
# Negative case for _preprocess() - invalid precision.
......@@ -219,7 +218,8 @@ def test_train():
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {"steptime_train_float32": [[2, 2, 2, 2, 2, 2, 2, 2]], '
'"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0]}}'
'"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0]}, '
'"reduce": {"steptime_train_float32": null, "throughput_train_float32": null}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__train(Precision.FLOAT32))
......@@ -229,7 +229,7 @@ def test_train():
benchmark = create_benchmark('--num_steps 0')
expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 3, '
'"start_time": null, "end_time": null, "raw_data": {}, "result": {}}'
'"start_time": null, "end_time": null, "raw_data": {}, "result": {}, "reduce": {}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__train(Precision.FLOAT32) is False)
......@@ -243,7 +243,8 @@ def test_inference():
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {"steptime_inference_float16": [[4, 4, 4, 4, 4, 4, 4, 4]], '
'"throughput_inference_float16": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"result": {"steptime_inference_float16": [4.0], "throughput_inference_float16": [8000.0]}}'
'"result": {"steptime_inference_float16": [4.0], "throughput_inference_float16": [8000.0]}, '
'"reduce": {"steptime_inference_float16": null, "throughput_inference_float16": null}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16))
......@@ -253,7 +254,7 @@ def test_inference():
benchmark = create_benchmark('--num_steps 0')
expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 3, '
'"start_time": null, "end_time": null, "raw_data": {}, "result": {}}'
'"start_time": null, "end_time": null, "raw_data": {}, "result": {}, "reduce": {}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16) is False)
......@@ -292,7 +293,9 @@ def test_benchmark():
'"steptime_train_float16": [[2, 2, 2, 2, 2, 2, 2, 2]], '
'"throughput_train_float16": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0], '
'"steptime_train_float16": [2.0], "throughput_train_float16": [16000.0]}}'
'"steptime_train_float16": [2.0], "throughput_train_float16": [16000.0]}, '
'"reduce": {"steptime_train_float32": null, "throughput_train_float32": null, '
'"steptime_train_float16": null, "throughput_train_float16": null}}'
)
assert (benchmark.serialized_result == expected_serialized_result)
......
......@@ -148,7 +148,8 @@ def test_launch_benchmark():
'{"name": "accumulation", "type": "micro", "run_count": 1, '
'"return_code": 0, "start_time": null, "end_time": null, '
'"raw_data": {"accumulation_result": ["1,3,6,10"]}, '
'"result": {"accumulation_result": [10]}}'
'"result": {"accumulation_result": [10]}, '
'"reduce": {"accumulation_result": null}}'
)
assert (result == expected)
......@@ -171,7 +172,8 @@ def test_launch_benchmark():
'{"name": "accumulation", "type": "micro", "run_count": 1, '
'"return_code": 0, "start_time": null, "end_time": null, '
'"raw_data": {"accumulation_result": ["1,3,6"]}, '
'"result": {"accumulation_result": [6]}}'
'"result": {"accumulation_result": [6]}, '
'"reduce": {"accumulation_result": null}}'
)
assert (result == expected)
......
......@@ -3,7 +3,7 @@
"""Tests for BenchmarkResult module."""
from superbench.benchmarks import BenchmarkType, ReturnCode
from superbench.benchmarks import BenchmarkType, ReturnCode, ReduceType
from superbench.benchmarks.result import BenchmarkResult
......@@ -67,9 +67,9 @@ def test_serialize_deserialize():
"""Test serialization/deserialization and compare the results."""
# Result with one metric.
result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value, run_count=2)
result.add_result('metric1', 300)
result.add_result('metric1', 200)
result.add_result('metric2', 100)
result.add_result('metric1', 300, ReduceType.MAX)
result.add_result('metric1', 200, ReduceType.MAX)
result.add_result('metric2', 100, ReduceType.AVG)
result.add_raw_data('metric1', [1, 2, 3])
result.add_raw_data('metric1', [4, 5, 6])
result.add_raw_data('metric1', [7, 8, 9])
......@@ -82,6 +82,7 @@ def test_serialize_deserialize():
'{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, '
'"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", '
'"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, '
'"result": {"metric1": [300, 200], "metric2": [100]}}'
'"result": {"metric1": [300, 200], "metric2": [100]}, '
'"reduce": {"metric1": "max", "metric2": "avg"}}'
)
assert (result.to_string() == expected)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment