Benchmarks: Code Revision - revise the DockerBenchmark base class (#179)

**Description** Revise the DockerBenchmark base to support image pull, image rm etc. **Major Revision** - image pull in _preprocess() - image clean in _postprocess() - execute customized commands in _benchmark() - add unit tests

Benchmarks: Code Revision - revise the DockerBenchmark base class (#179)
**Description** Revise the DockerBenchmark base to support image pull, image rm etc. **Major Revision** - image pull in _preprocess() - image clean in _postprocess() - execute customized commands in _benchmark() - add unit tests
37d5dfd5 · guoshzhao · GitHub · 115cd2e6 · 37d5dfd5 · 37d5dfd5
Unverified Commit 37d5dfd5 authored Sep 01, 2021 by guoshzhao Committed by GitHub Sep 01, 2021
3 changed files
--- a/superbench/benchmarks/docker_benchmarks/docker_base.py
+++ b/superbench/benchmarks/docker_benchmarks/docker_base.py
@@ -5,7 +5,8 @@
 from abc import abstractmethod
-from superbench.benchmarks import BenchmarkType
+from superbench.common.utils import logger, run_command
+from superbench.benchmarks import BenchmarkType, ReturnCode
 from superbench.benchmarks.base import Benchmark
@@ -20,8 +21,15 @@ def __init__(self, name, parameters=''):
        """
        super().__init__(name, parameters)
        self._benchmark_type = BenchmarkType.DOCKER
        # Command lines to launch the docker image and run the benchmarks inside docker.
-        self.__commands = list()
+        self._commands = list()
+        # Image uri of the current docker-benchmark.
+        self._image_uri = None
+        # Container name of the current docker-benchmark.
+        self._container_name = None
    '''
    # If need to add new arguments, super().add_parser_arguments() must be called.
@@ -36,24 +44,89 @@ def _preprocess(self):
        Return:
            True if _preprocess() succeed.
        """
-        return super()._preprocess()
+        if not super()._preprocess():
+            return False
+        if self._image_uri is None:
+            self._result.set_return_code(ReturnCode.DOCKERBENCHMARK_IMAGE_NOT_SET)
+            logger.error('The image uri is not set - benchmark: {}.'.format(self._name))
+            return False
+        if self._container_name is None:
+            self._result.set_return_code(ReturnCode.DOCKERBENCHMARK_CONTAINER_NOT_SET)
+            logger.error('The container name is not set - benchmark: {}.'.format(self._name))
+            return False
+        output = run_command('docker pull --quiet {}'.format(self._image_uri))
+        if output.returncode != 0:
+            self._result.set_return_code(ReturnCode.DOCKERBENCHMARK_IMAGE_PULL_FAILURE)
+            logger.error(
+                'DockerBenchmark pull image failed - benchmark: {}, error message: {}.'.format(
+                    self._name, output.stdout
+                )
+            )
+            return False
+        return True
+    def _postprocess(self):
+        """Postprocess/cleanup operations after the benchmarking.
+        Return:
+            True if _postprocess() succeed.
+        """
+        rm_containers = 'docker stop --time 20 {container} && docker rm {container}'.format(
+            container=self._container_name
+        )
+        run_command(rm_containers)
+        rm_image = 'docker rmi {}'.format(self._image_uri)
+        run_command(rm_image)
+        return True
-    @abstractmethod
    def _benchmark(self):
-        """Implementation for benchmarking."""
+        """Implementation for benchmarking.
-        pass
-    def _process_raw_result(self, raw_output):
+        Return:
+            True if run benchmark successfully.
+        """
+        for cmd_idx in range(len(self._commands)):
+            logger.info(
+                'Execute command - round: {}, benchmark: {}, command: {}.'.format(
+                    self._curr_run_index, self._name, self._commands[cmd_idx]
+                )
+            )
+            output = run_command(self._commands[cmd_idx])
+            if output.returncode != 0:
+                self._result.set_return_code(ReturnCode.DOCKERBENCHMARK_EXECUTION_FAILURE)
+                logger.error(
+                    'DockerBenchmark execution failed - round: {}, benchmark: {}, error message: {}.'.format(
+                        self._curr_run_index, self._name, output.stdout
+                    )
+                )
+                return False
+            else:
+                if not self._process_raw_result(cmd_idx, output.stdout):
+                    self._result.set_return_code(ReturnCode.DOCKERBENCHMARK_RESULT_PARSING_FAILURE)
+                    return False
+        return True
+    @abstractmethod
+    def _process_raw_result(self, cmd_idx, raw_output):
        """Function to process raw results and save the summarized results.
+          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
        Args:
-            raw_output (str): raw output string of the docker benchmark.
+            cmd_idx (int): the index of command corresponding with the raw_output.
+            raw_output (str): raw output string of the docker-benchmark.
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        # TODO: will implement it when add real benchmarks in the future.
+        pass
-        return True
    def print_env_info(self):
        """Print environments or dependencies information."""

--- a/superbench/benchmarks/return_code.py
+++ b/superbench/benchmarks/return_code.py
@@ -14,7 +14,7 @@ class ReturnCode(Enum):
    INVALID_BENCHMARK_TYPE = 2
    INVALID_BENCHMARK_RESULT = 3
    RUNTIME_EXCEPTION_ERROR = 4
-    # Return codes related with model benchmarks.
+    # Return codes related to model benchmarks.
    NO_SUPPORTED_PRECISION = 10
    DISTRIBUTED_SETTING_INIT_FAILURE = 13
    DISTRIBUTED_SETTING_DESTROY_FAILURE = 14
@@ -22,10 +22,16 @@ class ReturnCode(Enum):
    DATALOADER_INIT_FAILURE = 16
    OPTIMIZER_CREATION_FAILURE = 17
    MODEL_CREATION_FAILURE = 18
-    # Return codes related with micro benchmarks.
+    # Return codes related to micro benchmarks.
    MICROBENCHMARK_BINARY_NAME_NOT_SET = 30
    MICROBENCHMARK_BINARY_NOT_EXIST = 31
    MICROBENCHMARK_EXECUTION_FAILURE = 32
    MICROBENCHMARK_RESULT_PARSING_FAILURE = 33
    MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE = 34
    MICROBENCHMARK_DEVICE_GETTING_FAILURE = 35
+    # Return codes related to docker benchmarks.
+    DOCKERBENCHMARK_IMAGE_NOT_SET = 50
+    DOCKERBENCHMARK_CONTAINER_NOT_SET = 51
+    DOCKERBENCHMARK_IMAGE_PULL_FAILURE = 52
+    DOCKERBENCHMARK_EXECUTION_FAILURE = 53
+    DOCKERBENCHMARK_RESULT_PARSING_FAILURE = 54
--- a/tests/benchmarks/docker_benchmarks/test_docker_base.py
+++ b/tests/benchmarks/docker_benchmarks/test_docker_base.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""Tests for DockerBenchmark modules."""
+import re
+from superbench.benchmarks import BenchmarkType, ReturnCode
+from superbench.benchmarks.docker_benchmarks import DockerBenchmark
+class FakeDockerBenchmark(DockerBenchmark):
+    """Fake benchmark inherit from DockerBenchmark."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+        Args:
+            name: benchmark name.
+            parameters: benchmark parameters.
+        """
+        super().__init__(name, parameters)
+    def _process_raw_result(self, cmd_idx, raw_output):
+        """Function to process raw results and save the summarized results.
+          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
+        Args:
+            cmd_idx (int): the index of command corresponding with the raw_output.
+            raw_output (str): raw output string of the docker-benchmark.
+        Return:
+            True if the raw output string is valid and result can be extracted.
+        """
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+        pattern = r'\d+\.\d+'
+        result = re.findall(pattern, raw_output)
+        if len(result) != 2:
+            return False
+        try:
+            result = [float(item) for item in result]
+        except BaseException:
+            return False
+        self._result.add_result('cost1', result[0])
+        self._result.add_result('cost2', result[1])
+        return True
+def test_docker_benchmark_base():
+    """Test MicroBenchmarkWithInvoke."""
+    # Negative case - DOCKERBENCHMARK_IMAGE_NOT_SET.
+    benchmark = FakeDockerBenchmark('fake')
+    assert (benchmark._benchmark_type == BenchmarkType.DOCKER)
+    assert (benchmark.run() is False)
+    assert (benchmark.return_code == ReturnCode.DOCKERBENCHMARK_IMAGE_NOT_SET)
+    # Negative case - DOCKERBENCHMARK_CONTAINER_NOT_SET.
+    benchmark = FakeDockerBenchmark('fake')
+    benchmark._image_uri = 'image'
+    assert (benchmark.run() is False)
+    assert (benchmark.return_code == ReturnCode.DOCKERBENCHMARK_CONTAINER_NOT_SET)
+    # Negative case - DOCKERBENCHMARK_IMAGE_PULL_FAILURE.
+    benchmark = FakeDockerBenchmark('fake')
+    benchmark._image_uri = 'image'
+    benchmark._container_name = 'container'
+    assert (benchmark.run() is False)
+    assert (benchmark.return_code == ReturnCode.DOCKERBENCHMARK_IMAGE_PULL_FAILURE)
+    # Test for DockerBenchmark._benchmark().
+    benchmark._commands.append("echo -n 'cost1: 10.2, cost2: 20.2'")
+    benchmark._benchmark()
+    assert (benchmark.raw_data['raw_output_0'] == ['cost1: 10.2, cost2: 20.2'])
+    assert (benchmark.result['cost1'] == [10.2])
+    assert (benchmark.result['cost2'] == [20.2])