"superbench/benchmarks/vscode:/vscode.git/clone" did not exist on "a9ef0f99ab68302d13cdde0fe1da020d18866ef8"
Unverified Commit 2871a68b authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

Benchmarks: Code Revision - Revise result process interface and add result checking (#32)



* revise result process interface

* add more comments
Co-authored-by: default avatarGuoshuai Zhao <guzhao@microsoft.com>
parent 0e2b2b08
...@@ -43,14 +43,17 @@ def _benchmark(self): ...@@ -43,14 +43,17 @@ def _benchmark(self):
"""Implementation for benchmarking.""" """Implementation for benchmarking."""
pass pass
def _process_docker_result(self, output): def _process_raw_result(self, raw_output):
"""Function to process raw results and save the summarized results. """Function to process raw results and save the summarized results.
Args: Args:
output (str): raw output string of the docker benchmark. raw_output (str): raw output string of the docker benchmark.
Return:
True if the raw output string is valid and result can be extracted.
""" """
# TODO: will implement it when add real benchmarks in the future. # TODO: will implement it when add real benchmarks in the future.
pass return True
def print_env_info(self): def print_env_info(self):
"""Print environments or dependencies information.""" """Print environments or dependencies information."""
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
from abc import abstractmethod from abc import abstractmethod
from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkType from superbench.benchmarks import BenchmarkType
from superbench.benchmarks.base import Benchmark from superbench.benchmarks.base import Benchmark
...@@ -40,17 +41,48 @@ def _preprocess(self): ...@@ -40,17 +41,48 @@ def _preprocess(self):
@abstractmethod @abstractmethod
def _benchmark(self): def _benchmark(self):
"""Implementation for benchmarking.""" """Implementation for benchmarking.
Return:
True if run benchmark successfully.
"""
pass pass
def _process_micro_result(self, output): def _process_numeric_result(self, metric, result):
"""Function to save the numerical results.
Args:
metric (str): metric name which is the key.
result (List[numbers.Number]): numerical result.
Return:
True if result list is not empty.
"""
if len(result) == 0:
logger.error(
'Numerical result of benchmark is empty - round: {}, name: {}.'.format(
self._curr_run_index, self._name
)
)
return False
self._result.add_raw_data(metric, result)
self._result.add_result(metric, sum(result) / len(result))
return True
def _process_raw_result(self, raw_output):
"""Function to process raw results and save the summarized results. """Function to process raw results and save the summarized results.
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
Args: Args:
output (str): raw output string of the micro-benchmark. raw_output (str): raw output string of the micro-benchmark.
Return:
True if the raw output string is valid and result can be extracted.
""" """
# TODO: will implement it when add real benchmarks in the future. # TODO: will implement it when add real benchmarks in the future.
pass return True
def print_env_info(self): def print_env_info(self):
"""Print environments or dependencies information.""" """Print environments or dependencies information."""
......
...@@ -229,22 +229,16 @@ def __train(self, precision): ...@@ -229,22 +229,16 @@ def __train(self, precision):
# The unit of step time should be millisecond. # The unit of step time should be millisecond.
step_times = self._train_step(precision) step_times = self._train_step(precision)
if len(step_times) == 0: if not self.__process_model_result(ModelAction.TRAIN, precision, step_times):
logger.error(
'Step time list for training is empty - round: {}, model: {}, precision: {}.'.format(
self._curr_run_index, self._name, precision
)
)
return False return False
average_time = sum(step_times) / len(step_times)
logger.info( logger.info(
'Average train time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format( 'Average train time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
self._curr_run_index, self._name, precision, average_time self._curr_run_index, self._name, precision,
sum(step_times) / len(step_times)
) )
) )
self.__process_model_result(ModelAction.TRAIN, precision, step_times)
return True return True
def __inference(self, precision): def __inference(self, precision):
...@@ -259,22 +253,16 @@ def __inference(self, precision): ...@@ -259,22 +253,16 @@ def __inference(self, precision):
self._create_model(precision) self._create_model(precision)
# The unit of step time should be millisecond. # The unit of step time should be millisecond.
step_times = self._inference_step(precision) step_times = self._inference_step(precision)
if len(step_times) == 0: if not self.__process_model_result(ModelAction.INFERENCE, precision, step_times):
logger.error(
'Step time list for inference is empty - round: {}, model: {}, precision: {}.'.format(
self._curr_run_index, self._name, precision
)
)
return False return False
average_time = sum(step_times) / len(step_times)
logger.info( logger.info(
'Average inference time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format( 'Average inference time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
self._curr_run_index, self._name, precision, average_time self._curr_run_index, self._name, precision,
sum(step_times) / len(step_times)
) )
) )
self.__process_model_result(ModelAction.INFERENCE, precision, step_times)
return True return True
@abstractmethod @abstractmethod
...@@ -361,8 +349,19 @@ def __process_model_result(self, model_action, precision, step_times): ...@@ -361,8 +349,19 @@ def __process_model_result(self, model_action, precision, step_times):
model_action (ModelAction): train or inference. model_action (ModelAction): train or inference.
precision (Precision): precision of model and input data, such as float32, float16. precision (Precision): precision of model and input data, such as float32, float16.
step_times (list): The step time list of every training/inference step, unit is millisecond. step_times (list): The step time list of every training/inference step, unit is millisecond.
Return:
True if step_times list is not empty.
""" """
metric = 'steptime_{}_{}'.format(model_action.value, precision.value) if len(step_times) == 0:
logger.error(
'Step time list is empty - round: {}, model: {}, model_action: {}, precision: {}.'.format(
self._curr_run_index, self._name, model_action, precision
)
)
return False
metric = 'steptime_{}_{}'.format(model_action, precision)
self._result.add_raw_data(metric, step_times) self._result.add_raw_data(metric, step_times)
avg = sum(step_times) / len(step_times) avg = sum(step_times) / len(step_times)
self._result.add_result(metric, avg) self._result.add_result(metric, avg)
...@@ -370,11 +369,13 @@ def __process_model_result(self, model_action, precision, step_times): ...@@ -370,11 +369,13 @@ def __process_model_result(self, model_action, precision, step_times):
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec. # The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
millisecond_per_second = 1000 millisecond_per_second = 1000
throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times] throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times]
metric = 'throughput_{}_{}'.format(model_action.value, precision.value) metric = 'throughput_{}_{}'.format(model_action, precision)
self._result.add_raw_data(metric, throughput) self._result.add_raw_data(metric, throughput)
avg = sum(throughput) / len(throughput) avg = sum(throughput) / len(throughput)
self._result.add_result(metric, avg) self._result.add_result(metric, avg)
return True
@abstractmethod @abstractmethod
def _cal_params_count(self): def _cal_params_count(self):
"""Calculate the parameters scale of the model. """Calculate the parameters scale of the model.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment