Benchmarks: Code Revision - Revise result process interface and add result checking (#32)

* revise result process interface * add more comments Co-authored-by: Guoshuai Zhao <guzhao@microsoft.com>

Benchmarks: Code Revision - Revise result process interface and add result checking (#32)
* revise result process interface * add more comments Co-authored-by: Guoshuai Zhao <guzhao@microsoft.com>
2871a68b · guoshzhao · GitHub · 0e2b2b08 · 2871a68b · 2871a68b
Unverified Commit 2871a68b authored Apr 08, 2021 by guoshzhao Committed by GitHub Apr 08, 2021
3 changed files
--- a/superbench/benchmarks/docker_benchmarks/docker_base.py
+++ b/superbench/benchmarks/docker_benchmarks/docker_base.py
@@ -43,14 +43,17 @@ def _benchmark(self):
        """Implementation for benchmarking."""
        pass
-    def _process_docker_result(self, output):
+    def _process_raw_result(self, raw_output):
        """Function to process raw results and save the summarized results.
        Args:
-            output (str): raw output string of the docker benchmark.
+            raw_output (str): raw output string of the docker benchmark.
+        Return:
+            True if the raw output string is valid and result can be extracted.
        """
        # TODO: will implement it when add real benchmarks in the future.
-        pass
+        return True
    def print_env_info(self):
        """Print environments or dependencies information."""

--- a/superbench/benchmarks/micro_benchmarks/micro_base.py
+++ b/superbench/benchmarks/micro_benchmarks/micro_base.py
@@ -5,6 +5,7 @@
 from abc import abstractmethod
+from superbench.common.utils import logger
 from superbench.benchmarks import BenchmarkType
 from superbench.benchmarks.base import Benchmark
@@ -40,17 +41,48 @@ def _preprocess(self):
    @abstractmethod
    def _benchmark(self):
-        """Implementation for benchmarking."""
+        """Implementation for benchmarking.
+        Return:
+            True if run benchmark successfully.
+        """
        pass
-    def _process_micro_result(self, output):
+    def _process_numeric_result(self, metric, result):
+        """Function to save the numerical results.
+        Args:
+            metric (str): metric name which is the key.
+            result (List[numbers.Number]): numerical result.
+        Return:
+            True if result list is not empty.
+        """
+        if len(result) == 0:
+            logger.error(
+                'Numerical result of benchmark is empty - round: {}, name: {}.'.format(
+                    self._curr_run_index, self._name
+                )
+            )
+            return False
+        self._result.add_raw_data(metric, result)
+        self._result.add_result(metric, sum(result) / len(result))
+        return True
+    def _process_raw_result(self, raw_output):
        """Function to process raw results and save the summarized results.
+          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
        Args:
-            output (str): raw output string of the micro-benchmark.
+            raw_output (str): raw output string of the micro-benchmark.
+        Return:
+            True if the raw output string is valid and result can be extracted.
        """
        # TODO: will implement it when add real benchmarks in the future.
-        pass
+        return True
    def print_env_info(self):
        """Print environments or dependencies information."""

--- a/superbench/benchmarks/model_benchmarks/model_base.py
+++ b/superbench/benchmarks/model_benchmarks/model_base.py
@@ -229,22 +229,16 @@ def __train(self, precision):
        # The unit of step time should be millisecond.
        step_times = self._train_step(precision)
-        if len(step_times) == 0:
+        if not self.__process_model_result(ModelAction.TRAIN, precision, step_times):
-            logger.error(
-                'Step time list for training is empty - round: {}, model: {}, precision: {}.'.format(
-                    self._curr_run_index, self._name, precision
-                )
-            )
            return False
-        average_time = sum(step_times) / len(step_times)
        logger.info(
            'Average train time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
-                self._curr_run_index, self._name, precision, average_time
+                self._curr_run_index, self._name, precision,
+                sum(step_times) / len(step_times)
            )
        )
-        self.__process_model_result(ModelAction.TRAIN, precision, step_times)
        return True
    def __inference(self, precision):
@@ -259,22 +253,16 @@ def __inference(self, precision):
        self._create_model(precision)
        # The unit of step time should be millisecond.
        step_times = self._inference_step(precision)
-        if len(step_times) == 0:
+        if not self.__process_model_result(ModelAction.INFERENCE, precision, step_times):
-            logger.error(
-                'Step time list for inference is empty - round: {}, model: {}, precision: {}.'.format(
-                    self._curr_run_index, self._name, precision
-                )
-            )
            return False
-        average_time = sum(step_times) / len(step_times)
        logger.info(
            'Average inference time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
-                self._curr_run_index, self._name, precision, average_time
+                self._curr_run_index, self._name, precision,
+                sum(step_times) / len(step_times)
            )
        )
-        self.__process_model_result(ModelAction.INFERENCE, precision, step_times)
        return True
    @abstractmethod
@@ -361,8 +349,19 @@ def __process_model_result(self, model_action, precision, step_times):
            model_action (ModelAction): train or inference.
            precision (Precision): precision of model and input data, such as float32, float16.
            step_times (list): The step time list of every training/inference step, unit is millisecond.
+        Return:
+            True if step_times list is not empty.
        """
-        metric = 'steptime_{}_{}'.format(model_action.value, precision.value)
+        if len(step_times) == 0:
+            logger.error(
+                'Step time list is empty - round: {}, model: {}, model_action: {}, precision: {}.'.format(
+                    self._curr_run_index, self._name, model_action, precision
+                )
+            )
+            return False
+        metric = 'steptime_{}_{}'.format(model_action, precision)
        self._result.add_raw_data(metric, step_times)
        avg = sum(step_times) / len(step_times)
        self._result.add_result(metric, avg)
@@ -370,11 +369,13 @@ def __process_model_result(self, model_action, precision, step_times):
        # The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
        millisecond_per_second = 1000
        throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times]
-        metric = 'throughput_{}_{}'.format(model_action.value, precision.value)
+        metric = 'throughput_{}_{}'.format(model_action, precision)
        self._result.add_raw_data(metric, throughput)
        avg = sum(throughput) / len(throughput)
        self._result.add_result(metric, avg)
+        return True
    @abstractmethod
    def _cal_params_count(self):
        """Calculate the parameters scale of the model.