Unverified Commit 2871a68b authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

Benchmarks: Code Revision - Revise result process interface and add result checking (#32)



* revise result process interface

* add more comments
Co-authored-by: default avatarGuoshuai Zhao <guzhao@microsoft.com>
parent 0e2b2b08
......@@ -43,14 +43,17 @@ def _benchmark(self):
"""Implementation for benchmarking."""
pass
def _process_docker_result(self, output):
def _process_raw_result(self, raw_output):
"""Function to process raw results and save the summarized results.
Args:
output (str): raw output string of the docker benchmark.
raw_output (str): raw output string of the docker benchmark.
Return:
True if the raw output string is valid and result can be extracted.
"""
# TODO: will implement it when add real benchmarks in the future.
pass
return True
def print_env_info(self):
"""Print environments or dependencies information."""
......
......@@ -5,6 +5,7 @@
from abc import abstractmethod
from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkType
from superbench.benchmarks.base import Benchmark
......@@ -40,17 +41,48 @@ def _preprocess(self):
@abstractmethod
def _benchmark(self):
"""Implementation for benchmarking."""
"""Implementation for benchmarking.
Return:
True if run benchmark successfully.
"""
pass
def _process_micro_result(self, output):
def _process_numeric_result(self, metric, result):
"""Function to save the numerical results.
Args:
metric (str): metric name which is the key.
result (List[numbers.Number]): numerical result.
Return:
True if result list is not empty.
"""
if len(result) == 0:
logger.error(
'Numerical result of benchmark is empty - round: {}, name: {}.'.format(
self._curr_run_index, self._name
)
)
return False
self._result.add_raw_data(metric, result)
self._result.add_result(metric, sum(result) / len(result))
return True
def _process_raw_result(self, raw_output):
"""Function to process raw results and save the summarized results.
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
Args:
output (str): raw output string of the micro-benchmark.
raw_output (str): raw output string of the micro-benchmark.
Return:
True if the raw output string is valid and result can be extracted.
"""
# TODO: will implement it when add real benchmarks in the future.
pass
return True
def print_env_info(self):
"""Print environments or dependencies information."""
......
......@@ -229,22 +229,16 @@ def __train(self, precision):
# The unit of step time should be millisecond.
step_times = self._train_step(precision)
if len(step_times) == 0:
logger.error(
'Step time list for training is empty - round: {}, model: {}, precision: {}.'.format(
self._curr_run_index, self._name, precision
)
)
if not self.__process_model_result(ModelAction.TRAIN, precision, step_times):
return False
average_time = sum(step_times) / len(step_times)
logger.info(
'Average train time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
self._curr_run_index, self._name, precision, average_time
self._curr_run_index, self._name, precision,
sum(step_times) / len(step_times)
)
)
self.__process_model_result(ModelAction.TRAIN, precision, step_times)
return True
def __inference(self, precision):
......@@ -259,22 +253,16 @@ def __inference(self, precision):
self._create_model(precision)
# The unit of step time should be millisecond.
step_times = self._inference_step(precision)
if len(step_times) == 0:
logger.error(
'Step time list for inference is empty - round: {}, model: {}, precision: {}.'.format(
self._curr_run_index, self._name, precision
)
)
if not self.__process_model_result(ModelAction.INFERENCE, precision, step_times):
return False
average_time = sum(step_times) / len(step_times)
logger.info(
'Average inference time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
self._curr_run_index, self._name, precision, average_time
self._curr_run_index, self._name, precision,
sum(step_times) / len(step_times)
)
)
self.__process_model_result(ModelAction.INFERENCE, precision, step_times)
return True
@abstractmethod
......@@ -361,8 +349,19 @@ def __process_model_result(self, model_action, precision, step_times):
model_action (ModelAction): train or inference.
precision (Precision): precision of model and input data, such as float32, float16.
step_times (list): The step time list of every training/inference step, unit is millisecond.
Return:
True if step_times list is not empty.
"""
metric = 'steptime_{}_{}'.format(model_action.value, precision.value)
if len(step_times) == 0:
logger.error(
'Step time list is empty - round: {}, model: {}, model_action: {}, precision: {}.'.format(
self._curr_run_index, self._name, model_action, precision
)
)
return False
metric = 'steptime_{}_{}'.format(model_action, precision)
self._result.add_raw_data(metric, step_times)
avg = sum(step_times) / len(step_times)
self._result.add_result(metric, avg)
......@@ -370,11 +369,13 @@ def __process_model_result(self, model_action, precision, step_times):
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
millisecond_per_second = 1000
throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times]
metric = 'throughput_{}_{}'.format(model_action.value, precision.value)
metric = 'throughput_{}_{}'.format(model_action, precision)
self._result.add_raw_data(metric, throughput)
avg = sum(throughput) / len(throughput)
self._result.add_result(metric, avg)
return True
@abstractmethod
def _cal_params_count(self):
"""Calculate the parameters scale of the model.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment