Benchmarks: Initialization - Add base class, registry, and result (#1)

* benchmarks init. Co-authored-by: Guoshuai Zhao <guzhao@microsoft.com>

Benchmarks: Initialization - Add base class, registry, and result (#1)
* benchmarks init. Co-authored-by: Guoshuai Zhao <guzhao@microsoft.com>
4c87a3e4 · guoshzhao · GitHub · d32b96eb · 4c87a3e4 · 4c87a3e4
Unverified Commit 4c87a3e4 authored Feb 24, 2021 by guoshzhao Committed by GitHub Feb 24, 2021
19 changed files
--- a/superbench/benchmarks/__init__.py
+++ b/superbench/benchmarks/__init__.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Exposes interfaces of benchmarks used by SuperBench executor."""
+
+from .return_code import ReturnCode
+from .context import Platform, Framework, Precision, ModelAction, BenchmarkType, BenchmarkContext
+from .registry import BenchmarkRegistry
+
+__all__ = [
+    'ReturnCode', 'Platform', 'Framework', 'BenchmarkType', 'Precision', 'ModelAction', 'BenchmarkContext',
+    'BenchmarkRegistry'
+]
--- a/superbench/benchmarks/base.py
+++ b/superbench/benchmarks/base.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the base class."""
+
+import argparse
+import numbers
+from datetime import datetime
+from abc import ABC, abstractmethod
+
+from superbench.common.utils import logger
+from superbench.benchmarks import BenchmarkType, ReturnCode
+from superbench.benchmarks.result import BenchmarkResult
+
+
+class Benchmark(ABC):
+    """The base class of all benchmarks."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        self._name = name
+        self._argv = list(filter(None, parameters.split(' ')))
+        self._benchmark_type = None
+        self._parser = argparse.ArgumentParser(
+            add_help=False,
+            usage=argparse.SUPPRESS,
+            allow_abbrev=False,
+            formatter_class=argparse.MetavarTypeHelpFormatter
+        )
+        self._args = None
+        self._curr_run_index = 0
+        self._result = None
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        self._parser.add_argument(
+            '--run_count',
+            type=int,
+            default=1,
+            required=False,
+            help='The run count of benchmark.',
+        )
+        self._parser.add_argument(
+            '--duration',
+            type=int,
+            default=0,
+            required=False,
+            help='The elapsed time of benchmark in seconds.',
+        )
+
+    def get_configurable_settings(self):
+        """Get all the configurable settings.
+
+        Return:
+            All configurable settings in raw string.
+        """
+        return self._parser.format_help().strip()
+
+    def parse_args(self):
+        """Parse the arguments.
+
+        Return:
+            ret (bool): whether parse succeed or not.
+            args (argparse.Namespace): parsed arguments.
+            unknown (list): unknown arguments.
+        """
+        try:
+            args, unknown = self._parser.parse_known_args(self._argv)
+        except BaseException as e:
+            logger.error('Invalid argument - benchmark: {}, message: {}.'.format(self._name, str(e)))
+            return False, None, None
+
+        if len(unknown) > 0:
+            logger.warning(
+                'Benchmark has unknown arguments - benchmark: {}, unknown arguments: {}'.format(
+                    self._name, ' '.join(unknown)
+                )
+            )
+
+        return True, args, unknown
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        self.add_parser_arguments()
+        ret, self._args, unknown = self.parse_args()
+
+        if not ret:
+            self._result = BenchmarkResult(self._name, self._benchmark_type, ReturnCode.INVALID_ARGUMENT)
+            return False
+
+        self._result = BenchmarkResult(
+            self._name, self._benchmark_type, ReturnCode.SUCCESS, run_count=self._args.run_count
+        )
+
+        if not isinstance(self._benchmark_type, BenchmarkType):
+            logger.error(
+                'Invalid benchmark type - benchmark: {}, type: {}'.format(self._name, type(self._benchmark_type))
+            )
+            self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_TYPE)
+            return False
+
+        return True
+
+    @abstractmethod
+    def _benchmark(self):
+        """Implementation for benchmarking."""
+        pass
+
+    def run(self):
+        """Function to launch the benchmarking.
+
+        Return:
+            True if run benchmark successfully.
+        """
+        if not self._preprocess():
+            return False
+
+        self._start_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
+        for self._curr_run_index in range(self._args.run_count):
+            if not self._benchmark():
+                return False
+
+        self._end_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
+        self._result.set_timestamp(self._start_time, self._end_time)
+
+        if not self.__check_result_format():
+            return False
+
+        return True
+
+    def __check_result_format(self):
+        """Check the validation of result object.
+
+        Return:
+            True if the result is valid.
+        """
+        if (not self.__check_result_type()) or (not self.__check_summarized_result()) or (not self.__check_raw_data()):
+            self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT)
+            return False
+
+        return True
+
+    def __check_result_type(self):
+        """Check the type of result object.
+
+        Return:
+            True if the result is instance of BenchmarkResult.
+        """
+        if not isinstance(self._result, BenchmarkResult):
+            logger.error(
+                'Invalid benchmark result type - benchmark: {}, type: {}'.format(self._name, type(self._result))
+            )
+            return False
+
+        return True
+
+    def __check_summarized_result(self):
+        """Check the validation of summary result.
+
+        Return:
+            True if the summary result is instance of List[Number].
+        """
+        for metric in self._result.result:
+            is_valid = isinstance(self._result.result[metric], list)
+            if is_valid:
+                for value in self._result.result[metric]:
+                    if not isinstance(value, numbers.Number):
+                        is_valid = False
+                        break
+
+            if not is_valid:
+                logger.error(
+                    'Invalid summarized result - benchmark: {}, metric name: {}, expect: List[Number], got: {}.'.format(
+                        self._name, metric, type(self._result.result[metric])
+                    )
+                )
+                return False
+
+        return True
+
+    def __check_raw_data(self):
+        """Check the validation of raw data.
+
+        Return:
+            True if the raw data is:
+              instance of List[List[Number]] for BenchmarkType.MODEL, and BenchmarkType.DOCKER.
+              instance of List[str] for BenchmarkType.MICRO.
+        """
+        for metric in self._result.raw_data:
+            is_valid = isinstance(self._result.raw_data[metric], list)
+            if is_valid:
+                for run in self._result.raw_data[metric]:
+                    if self._benchmark_type in [BenchmarkType.MODEL, BenchmarkType.DOCKER]:
+                        if not isinstance(run, list):
+                            is_valid = False
+                            break
+                        for value in run:
+                            if not isinstance(value, numbers.Number):
+                                is_valid = False
+                                break
+                    elif self._benchmark_type in [BenchmarkType.MICRO]:
+                        is_valid = isinstance(run, str)
+
+            if not is_valid:
+                logger.error(
+                    'Invalid raw data - benchmark: {}, metric name: {}, expect: {}, got: {}.'.format(
+                        self._name, metric,
+                        'List[str]' if self._benchmark_type == BenchmarkType.MICRO else 'List[List[Number]]',
+                        type(self._result.raw_data[metric])
+                    )
+                )
+                return False
+
+        return True
+
+    def print_env_info(self):
+        """Print environments or dependencies information."""
+        # TODO: will implement it when add real benchmarks in the future.
+        pass
+
+    @property
+    def name(self):
+        """Decoration function to access benchmark name."""
+        return self._result.name
+
+    @property
+    def type(self):
+        """Decoration function to access benchmark type."""
+        return self._result.type
+
+    @property
+    def run_count(self):
+        """Decoration function to access benchmark run_count."""
+        return self._result.run_count
+
+    @property
+    def return_code(self):
+        """Decoration function to access benchmark return_code."""
+        return self._result.return_code
+
+    @property
+    def start_time(self):
+        """Decoration function to access benchmark start_time."""
+        return self._result.start_time
+
+    @property
+    def end_time(self):
+        """Decoration function to access benchmark end_time."""
+        return self._result.end_time
+
+    @property
+    def raw_data(self):
+        """Decoration function to access benchmark raw_data."""
+        return self._result.raw_data
+
+    @property
+    def result(self):
+        """Decoration function to access benchmark result."""
+        return self._result.result
+
+    @property
+    def serialized_result(self):
+        """Decoration function to access benchmark result."""
+        return self._result.to_string()
--- a/superbench/benchmarks/context.py
+++ b/superbench/benchmarks/context.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""A module for unified context of benchmarks."""
+
+import enum
+
+
+class Enum(enum.Enum):
+    """Customized Enum class."""
+    @classmethod
+    def get_values(cls):
+        """Return the value list."""
+        values = [item.value for item in cls]
+        return values
+
+    def __str__(self):
+        """Value as the string."""
+        return self.value
+
+
+class Platform(Enum):
+    """The Enum class representing different platforms."""
+    CPU = 'CPU'
+    CUDA = 'CUDA'
+    ROCM = 'ROCm'
+
+
+class Framework(Enum):
+    """The Enum class representing different frameworks."""
+    ONNX = 'onnx'
+    PYTORCH = 'pytorch'
+    TENSORFLOW1 = 'tf1'
+    TENSORFLOW2 = 'tf2'
+    NONE = 'none'
+
+
+class BenchmarkType(Enum):
+    """The Enum class representing different types of benchmarks."""
+    MODEL = 'model'
+    MICRO = 'micro'
+    DOCKER = 'docker'
+
+
+class Precision(Enum):
+    """The Enum class representing different data precisions."""
+    FLOAT16 = 'float16'
+    FLOAT32 = 'float32'
+    FLOAT64 = 'float64'
+    BFLOAT16 = 'bfloat16'
+    UINT8 = 'uint8'
+    INT8 = 'int8'
+    INT16 = 'int16'
+    INT32 = 'int32'
+    INT64 = 'int64'
+
+
+class ModelAction(Enum):
+    """The Enum class representing different model process."""
+    TRAIN = 'train'
+    INFERENCE = 'inference'
+
+
+class BenchmarkContext():
+    """Context class of all benchmarks.
+
+    Containing all information to launch one benchmark.
+    """
+    def __init__(self, name, platform, parameters='', framework=Framework.NONE):
+        """Constructor.
+
+        Args:
+            name (str): name of benchmark in config file.
+            platform (Platform): Platform types like CUDA, ROCM.
+            parameters (str): predefined parameters of benchmark.
+            framework (Framework): Framework types like ONNX, PYTORCH.
+        """
+        self.__name = name
+        self.__platform = platform
+        self.__parameters = parameters
+        self.__framework = framework
+
+    @property
+    def name(self):
+        """Decoration function to access __name."""
+        return self.__name
+
+    @property
+    def platform(self):
+        """Decoration function to access __platform."""
+        return self.__platform
+
+    @property
+    def parameters(self):
+        """Decoration function to access __parameters."""
+        return self.__parameters
+
+    @property
+    def framework(self):
+        """Decoration function to access __framework."""
+        return self.__framework
--- a/superbench/benchmarks/docker_benchmarks/__init__.py
+++ b/superbench/benchmarks/docker_benchmarks/__init__.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""A module containing all the benchmarks packaged in docker."""
+
+from .docker_base import DockerBenchmark
+
+__all__ = ['DockerBenchmark']
--- a/superbench/benchmarks/docker_benchmarks/docker_base.py
+++ b/superbench/benchmarks/docker_benchmarks/docker_base.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the docker-benchmark base class."""
+
+from abc import abstractmethod
+
+from superbench.benchmarks import BenchmarkType
+from superbench.benchmarks.base import Benchmark
+
+
+class DockerBenchmark(Benchmark):
+    """The base class of benchmarks packaged in docker container."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+        self._benchmark_type = BenchmarkType.DOCKER
+        # Command lines to launch the docker image and run the benchmarks inside docker.
+        self.__commands = list()
+
+    '''
+    # If need to add new arguments, super().add_parser_arguments() must be called.
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+    '''
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        return super()._preprocess()
+
+    @abstractmethod
+    def _benchmark(self):
+        """Implementation for benchmarking."""
+        pass
+
+    def _process_docker_result(self, output):
+        """Function to process raw results and save the summarized results.
+
+        Args:
+            output (str): raw output string of the docker benchmark.
+        """
+        # TODO: will implement it when add real benchmarks in the future.
+        pass
+
+    def print_env_info(self):
+        """Print environments or dependencies information."""
+        # TODO: will implement it when add real benchmarks in the future.
+        pass
--- a/superbench/benchmarks/micro_benchmarks/__init__.py
+++ b/superbench/benchmarks/micro_benchmarks/__init__.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""A module containing all the micro-benchmarks."""
+
+from .micro_base import MicroBenchmark
+
+__all__ = ['MicroBenchmark']
--- a/superbench/benchmarks/micro_benchmarks/micro_base.py
+++ b/superbench/benchmarks/micro_benchmarks/micro_base.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the micro-benchmark base class."""
+
+from abc import abstractmethod
+
+from superbench.benchmarks import BenchmarkType
+from superbench.benchmarks.base import Benchmark
+
+
+class MicroBenchmark(Benchmark):
+    """The base class of micro-benchmarks."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+        self._benchmark_type = BenchmarkType.MICRO
+        # Command lines to launch the micro-benchmarks.
+        self.__commands = list()
+
+    '''
+    # If need to add new arguments, super().add_parser_arguments() must be called.
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+    '''
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        return super()._preprocess()
+
+    @abstractmethod
+    def _benchmark(self):
+        """Implementation for benchmarking."""
+        pass
+
+    def _process_micro_result(self, output):
+        """Function to process raw results and save the summarized results.
+
+        Args:
+            output (str): raw output string of the micro-benchmark.
+        """
+        # TODO: will implement it when add real benchmarks in the future.
+        pass
+
+    def print_env_info(self):
+        """Print environments or dependencies information."""
+        # TODO: will implement it when add real benchmarks in the future.
+        pass
--- a/superbench/benchmarks/model_benchmarks/__init__.py
+++ b/superbench/benchmarks/model_benchmarks/__init__.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""A module containing all the e2e model related benchmarks."""
+
+from .model_base import ModelBenchmark
+
+__all__ = ['ModelBenchmark']
--- a/superbench/benchmarks/model_benchmarks/model_base.py
+++ b/superbench/benchmarks/model_benchmarks/model_base.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the model-benchmark base class."""
+
+from abc import abstractmethod
+
+from superbench.common.utils import logger
+from superbench.benchmarks import Precision, ModelAction, BenchmarkType, ReturnCode
+from superbench.benchmarks.base import Benchmark
+from superbench.benchmarks.context import Enum
+
+
+class DistributedImpl(Enum):
+    """The Enum class representing different distributed implementations."""
+    DDP = 'ddp'
+    MIRRORED = 'mirrored'
+    MW_MIRRORED = 'multiworkermirrored'
+    PS = 'parameterserver'
+    HOROVOD = 'horovod'
+
+
+class DistributedBackend(Enum):
+    """The Enum class representing different distributed backends."""
+    NCCL = 'nccl'
+    MPI = 'mpi'
+    GLOO = 'gloo'
+
+
+class ModelBenchmark(Benchmark):
+    """The base class of E2E model benchmarks."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+
+        self._benchmark_type = BenchmarkType.MODEL
+        self._world_size = None
+        self._dataset = None
+        self._dataloader = None
+        self._model = None
+        self._optimizer = None
+        self._loss_fn = None
+        self._target = None
+        self._supported_precision = []
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+
+        self._parser.add_argument(
+            '--num_warmup',
+            type=int,
+            default=64,
+            required=False,
+            help='The number of warmup step',
+        )
+        self._parser.add_argument(
+            '--num_steps',
+            type=int,
+            default=2048,
+            required=False,
+            help='The number of test step',
+        )
+        self._parser.add_argument(
+            '--batch_size',
+            type=int,
+            default=32,
+            required=False,
+            help='The number of batch size',
+        )
+        self._parser.add_argument(
+            '--precision',
+            type=Precision,
+            default=[Precision.FLOAT32, Precision.FLOAT16],
+            nargs='+',
+            required=False,
+            help='Model precision. E.g. {}.'.format(' '.join(Precision.get_values())),
+        )
+        self._parser.add_argument(
+            '--model_action',
+            type=ModelAction,
+            default=[ModelAction.TRAIN],
+            nargs='+',
+            required=False,
+            help='Benchmark model process. E.g. {}.'.format(' '.join(ModelAction.get_values())),
+        )
+        self._parser.add_argument(
+            '--distributed_impl',
+            type=DistributedImpl,
+            default=None,
+            required=False,
+            help='Distributed implementations. E.g. {}'.format(' '.join(DistributedImpl.get_values())),
+        )
+
+        self._parser.add_argument(
+            '--distributed_backend',
+            type=DistributedBackend,
+            default=None,
+            required=False,
+            help='Distributed backends. E.g. {}'.format(' '.join(DistributedBackend.get_values())),
+        )
+
+    @abstractmethod
+    def _init_distributed_setting(self):
+        """Initialize the distributed library and bind the worker to GPU."""
+        pass
+
+    @abstractmethod
+    def _generate_dataset(self):
+        """Generate dataset for benchmarking according to shape info."""
+        pass
+
+    @abstractmethod
+    def _init_dataloader(self):
+        """Initialize the distributed dataloader."""
+        pass
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        ret = super()._preprocess()
+        if not ret:
+            return False
+
+        self._init_distributed_setting()
+        self._generate_dataset()
+        self._init_dataloader()
+        return True
+
+    @abstractmethod
+    def _create_optimizer(self):
+        """Create the optimzier instance used for training."""
+        pass
+
+    @abstractmethod
+    def _create_model(self, precision):
+        """Construct the model for benchmarking.
+
+        Args:
+            precision (Precision): precision of model and input data, such as float32, float16.
+        """
+        pass
+
+    def __train(self, precision):
+        """Launch the training benchmark.
+
+        Args:
+            precision (Precision): precision of model and input data, such as float32, float16.
+
+        Return:
+            True if step_times list is not empty.
+        """
+        self._create_model(precision)
+        self._create_optimizer()
+        # The unit of step time should be millisecond.
+        step_times = self._train_step(precision)
+        if len(step_times) == 0:
+            logger.error(
+                'Step time list for training is empty - round: {}, model: {}, precision: {}.'.format(
+                    self._curr_run_index, self._name, precision
+                )
+            )
+            return False
+
+        average_time = sum(step_times) / len(step_times)
+        logger.info(
+            'Average train time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
+                self._curr_run_index, self._name, precision, average_time
+            )
+        )
+
+        self.__process_model_result(ModelAction.TRAIN, precision, step_times)
+        return True
+
+    def __inference(self, precision):
+        """Launch the inference benchmark.
+
+        Args:
+            precision (Precision): precision of model and input data, such as float32, float16.
+
+        Return:
+            True if step_times list is not empty.
+        """
+        self._create_model(precision)
+        # The unit of step time should be millisecond.
+        step_times = self._inference_step(precision)
+        if len(step_times) == 0:
+            logger.error(
+                'Step time list for inference is empty - round: {}, model: {}, precision: {}.'.format(
+                    self._curr_run_index, self._name, precision
+                )
+            )
+            return False
+
+        average_time = sum(step_times) / len(step_times)
+        logger.info(
+            'Average inference time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
+                self._curr_run_index, self._name, precision, average_time
+            )
+        )
+
+        self.__process_model_result(ModelAction.INFERENCE, precision, step_times)
+        return True
+
+    @abstractmethod
+    def _train_step(self, precision):
+        """Define the training process.
+
+        Args:
+            precision (Precision): precision of model and input data, such as float32, float16.
+
+        Return:
+            The step-time list of every training step.
+        """
+        pass
+
+    @abstractmethod
+    def _inference_step(self, precision):
+        """Define the inference process.
+
+        Args:
+            precision (Precision): precision of model and input data,
+              such as float32, float16.
+
+        Return:
+            The latency list of every inference operation.
+        """
+        pass
+
+    def _benchmark(self):
+        """Implementation for benchmarking.
+
+        Return:
+            True if run benchmark successfully.
+        """
+        precision_need_to_run = list()
+        for precision in self._args.precision:
+            # Check if the precision is supported or not.
+            if precision not in self._supported_precision:
+                logger.warning(
+                    'Can not run with specified precision - model: {}, supprted precision: {}, specified precision: {}'.
+                    format(self._name, ' '.join([p.value for p in self._supported_precision]), precision)
+                )
+            else:
+                precision_need_to_run.append(precision)
+
+        if len(precision_need_to_run) == 0:
+            self._result.set_return_code(ReturnCode.NO_SUPPORTED_PRECISION)
+            return False
+
+        for precision in precision_need_to_run:
+            for model_action in self._args.model_action:
+                if model_action == ModelAction.TRAIN:
+                    if not self.__train(precision):
+                        self._result.set_return_code(ReturnCode.MODEL_TRAIN_FAILURE)
+                        return False
+                elif model_action == ModelAction.INFERENCE:
+                    if not self.__inference(precision):
+                        self._result.set_return_code(ReturnCode.MODEL_INFERENCE_FAILURE)
+                        return False
+                else:
+                    logger.warning(
+                        'Model action has no implementation yet - model: {}, model_action: {}'.format(
+                            self._name, model_action
+                        )
+                    )
+
+        return True
+
+    def __process_model_result(self, model_action, precision, step_times):
+        """Function to process raw results and save the summarized results.
+
+        Args:
+            model_action (ModelAction): train or inference.
+            precision (Precision): precision of model and input data, such as float32, float16.
+            step_times (list): The step time list of every training/inference step, unit is millisecond.
+        """
+        metric = 'steptime_{}_{}'.format(model_action.value, precision.value)
+        self._result.add_raw_data(metric, step_times)
+        avg = sum(step_times) / len(step_times)
+        self._result.add_result(metric, avg)
+
+        # The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
+        millisecond_per_second = 1000
+        throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times]
+        metric = 'throughput_{}_{}'.format(model_action.value, precision.value)
+        self._result.add_raw_data(metric, throughput)
+        avg = sum(throughput) / len(throughput)
+        self._result.add_result(metric, avg)
+
+    @abstractmethod
+    def _cal_params_size(self):
+        """Calculate the parameters scale of the model.
+
+        Return:
+            The count of trainable parameters.
+        """
+        pass
+
+    def print_env_info(self):
+        """Print environments or dependencies information."""
+        # TODO: will implement it when add real benchmarks in the future.
+        pass
--- a/superbench/benchmarks/registry.py
+++ b/superbench/benchmarks/registry.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Interfaces that provide access to benchmarks."""
+
+from typing import Dict
+
+from superbench.common.utils import logger
+from superbench.common.errors import DuplicateBenchmarkRegistrationError
+from superbench.benchmarks import Platform, Framework, BenchmarkContext
+from superbench.benchmarks.base import Benchmark
+
+
+class BenchmarkRegistry:
+    """Class that minatains all benchmarks.
+
+    Provide the following functions:
+        Register new benchmark.
+        Get the internal benchmark name.
+        Check the validation of benchmark parameters.
+        Get all configurable settings of benchmark.
+        Launch one benchmark and return the result.
+    """
+    benchmarks: Dict[str, dict] = dict()
+
+    @classmethod
+    def register_benchmark(cls, name, class_def, parameters=None, platform=None):
+        """Register new benchmark, key is the benchmark name.
+
+        Args:
+            name (str): internal name of benchmark.
+            class_def (Benchmark): class object of benchmark.
+            parameters (str): predefined parameters of benchmark.
+            platform (Platform): Platform types like CUDA, ROCM.
+        """
+        if not name or not isinstance(name, str):
+            logger.log_and_raise(
+                TypeError,
+                'Name of registered benchmark is not string - benchmark: {}, type: {}'.format(name, type(name))
+            )
+
+        if not issubclass(class_def, Benchmark):
+            logger.log_and_raise(
+                TypeError,
+                'Registered class is not subclass of Benchmark - benchmark: {}, type: {}'.format(name, type(class_def))
+            )
+
+        if name not in cls.benchmarks:
+            cls.benchmarks[name] = dict()
+
+        if platform:
+            if platform not in Platform:
+                platform_list = list(map(str, Platform))
+                logger.log_and_raise(
+                    TypeError, 'Unknown platform - benchmark: {}, supportted platforms: {}, but got: {}'.format(
+                        name, platform_list, platform
+                    )
+                )
+
+            if platform not in cls.benchmarks[name]:
+                cls.benchmarks[name][platform] = (class_def, parameters)
+            else:
+                logger.log_and_raise(
+                    DuplicateBenchmarkRegistrationError,
+                    'Duplicate registration - benchmark: {}, platform: {}'.format(name, platform)
+                )
+        else:
+            # If not specified the tag, means the
+            # benchmark works for all platforms.
+            for p in Platform:
+                if p not in cls.benchmarks[name]:
+                    cls.benchmarks[name][p] = (class_def, parameters)
+                else:
+                    logger.log_and_raise(
+                        DuplicateBenchmarkRegistrationError, 'Duplicate registration - benchmark: {}'.format(name)
+                    )
+
+    @classmethod
+    def is_benchmark_context_valid(cls, benchmark_context):
+        """Check wether the benchmark context is valid or not.
+
+        Args:
+            benchmark_context (BenchmarkContext): the benchmark context.
+
+        Return:
+            ret (bool): return True if context is valid.
+        """
+        if isinstance(benchmark_context, BenchmarkContext) and benchmark_context.name:
+            return True
+        else:
+            logger.error('Benchmark has invalid context')
+            return False
+
+    @classmethod
+    def __get_benchmark_name(cls, benchmark_context):
+        """Return the internal benchmark name.
+
+        Args:
+            benchmark_context (BenchmarkContext): the benchmark context.
+
+        Return:
+            benchmark_name (str): internal benchmark name, None means context is invalid.
+        """
+        if not cls.is_benchmark_context_valid(benchmark_context):
+            return None
+
+        benchmark_name = benchmark_context.name
+        framework = benchmark_context.framework
+
+        if framework != Framework.NONE:
+            benchmark_name = framework.value + '-' + benchmark_name
+
+        return benchmark_name
+
+    @classmethod
+    def check_parameters(cls, benchmark_context):
+        """Check the validation of customized parameters.
+
+        Args:
+            benchmark_context (BenchmarkContext): the benchmark context.
+
+        Return:
+            Return True if benchmark exists and context/parameters are valid.
+        """
+        if not cls.is_benchmark_context_valid(benchmark_context):
+            return False
+
+        benchmark_name = cls.__get_benchmark_name(benchmark_context)
+        platform = benchmark_context.platform
+        customized_parameters = benchmark_context.parameters
+
+        if benchmark_name:
+            (benchmark_class, params) = cls.__select_benchmark(benchmark_name, platform)
+            if benchmark_class:
+                benchmark = benchmark_class(benchmark_name, customized_parameters)
+                benchmark.add_parser_arguments()
+                ret, args, unknown = benchmark.parse_args()
+                if ret and len(unknown) < 1:
+                    return True
+
+        return False
+
+    @classmethod
+    def get_benchmark_configurable_settings(cls, benchmark_context):
+        """Get all configurable settings of benchmark.
+
+        Args:
+            benchmark_context (BenchmarkContext): the benchmark context.
+
+        Return:
+            All configurable settings in raw string, None means context is invalid or no benchmark is found.
+        """
+        if not cls.is_benchmark_context_valid(benchmark_context):
+            return None
+
+        benchmark_name = cls.__get_benchmark_name(benchmark_context)
+        platform = benchmark_context.platform
+
+        (benchmark_class, predefine_params) = cls.__select_benchmark(benchmark_name, platform)
+        if benchmark_class:
+            benchmark = benchmark_class(benchmark_name)
+            benchmark.add_parser_arguments()
+            return benchmark.get_configurable_settings()
+        else:
+            return None
+
+    @classmethod
+    def launch_benchmark(cls, benchmark_context):
+        """Select and Launch benchmark.
+
+        Args:
+            benchmark_context (BenchmarkContext): the benchmark context.
+
+        Return:
+            benchmark (Benchmark): the benchmark instance contains all results,
+              None means context is invalid or no benchmark is found.
+        """
+        if not cls.is_benchmark_context_valid(benchmark_context):
+            return None
+
+        benchmark_name = cls.__get_benchmark_name(benchmark_context)
+
+        benchmark = None
+        if benchmark_name:
+            platform = benchmark_context.platform
+            parameters = benchmark_context.parameters
+            (benchmark_class, predefine_params) = cls.__select_benchmark(benchmark_name, platform)
+            if benchmark_class:
+                if predefine_params:
+                    parameters = predefine_params + ' ' + parameters
+
+                benchmark = benchmark_class(benchmark_name, parameters)
+                ret = benchmark.run()
+                print('ret = {}'.format(ret))
+
+        return benchmark
+
+    @classmethod
+    def is_benchmark_registered(cls, benchmark_context):
+        """Check wether the benchmark is registered or not.
+
+        Args:
+            benchmark_context (BenchmarkContext): the benchmark context.
+
+        Return:
+            ret (bool): return True if context is valid and benchmark is registered.
+        """
+        if not cls.is_benchmark_context_valid(benchmark_context):
+            return False
+
+        benchmark_name = cls.__get_benchmark_name(benchmark_context)
+        platform = benchmark_context.platform
+
+        if cls.benchmarks.get(benchmark_name, {}).get(platform) is None:
+            return False
+
+        return True
+
+    @classmethod
+    def __select_benchmark(cls, name, platform):
+        """Select benchmark by name and platform.
+
+        Args:
+            name (str): internal name of benchmark.
+            platform (Platform): Platform type of benchmark.
+
+        Return:
+            benchmark_class (Benchmark): class object of benchmark.
+            predefine_params (str): predefined parameters which is set when register the benchmark.
+        """
+        if name not in cls.benchmarks or platform not in cls.benchmarks[name]:
+            logger.warning('Benchmark has no implementation, name: {}, platform: {}'.format(name, platform))
+            return (None, None)
+
+        (benchmark_class, predefine_params) = cls.benchmarks[name][platform]
+
+        return (benchmark_class, predefine_params)
+
+    @classmethod
+    def clean_benchmarks(cls):
+        """Clean up the benchmark registry."""
+        cls.benchmarks.clear()
--- a/superbench/benchmarks/result.py
+++ b/superbench/benchmarks/result.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""A module for unified result of benchmarks."""
+
+import json
+from enum import Enum
+
+from superbench.common.utils import logger
+
+
+class BenchmarkResult():
+    """Result class of all benchmarks.
+
+    Defines the unified result format.
+    """
+    def __init__(self, name, type, return_code, run_count=0):
+        """Constructor.
+
+        Args:
+            name (str): name of benchmark.
+            type (BenchmarkType): type of benchmark.
+            return_code (ReturnCode): return code of benchmark.
+            run_count (int): run count of benchmark, all runs will be organized as array.
+        """
+        self.__name = name
+        self.__type = type
+        self.__run_count = run_count
+        self.__return_code = return_code
+        self.__start_time = None
+        self.__end_time = None
+        self.__raw_data = dict()
+        self.__result = dict()
+
+    def __eq__(self, rhs):
+        """Override equal function for deep comparison.
+
+        Args:
+            rhs (BenchmarkResult): instance to compare.
+
+        Return:
+            True if two instances have all the same values for all the same attributes.
+        """
+        return self.__dict__ == rhs.__dict__
+
+    def add_raw_data(self, metric, value):
+        """Add raw benchmark data into result.
+
+        Args:
+            metric (str): metric name which is the key.
+            value (str or list): raw benchmark data.
+              For e2e model benchmarks, its type is list.
+              For micro-benchmarks or docker-benchmarks, its type is string.
+
+        Return:
+            True if succeed to add the raw data.
+        """
+        if not metric or not isinstance(metric, str):
+            logger.error(
+                'metric name of benchmark is not string, name: {}, metric type: {}'.format(self.__name, type(metric))
+            )
+            return False
+
+        if metric not in self.__raw_data:
+            self.__raw_data[metric] = list()
+        self.__raw_data[metric].append(value)
+
+        return True
+
+    def add_result(self, metric, value):
+        """Add summarized data into result.
+
+        Args:
+            metric (str): metric name which is the key.
+            value (float): summarized data.
+              For e2e model benchmarks, the value is step-time or throughput.
+              For micro-benchmarks, the value is FLOPS, bandwidth and etc.
+
+        Return:
+            True if succeed to add the result.
+        """
+        if not metric or not isinstance(metric, str):
+            logger.error(
+                'metric name of benchmark is not string, name: {}, metric type: {}'.format(self.__name, type(metric))
+            )
+            return False
+
+        if metric not in self.__result:
+            self.__result[metric] = list()
+        self.__result[metric].append(value)
+
+        return True
+
+    def set_timestamp(self, start, end):
+        """Set the start and end timestamp of benchmarking.
+
+        Args:
+            start (datetime): start timestamp of benchmarking.
+            end (datetime): end timestamp of benchmarking.
+        """
+        self.__start_time = start
+        self.__end_time = end
+
+    def set_benchmark_type(self, benchmark_type):
+        """Set the type of benchmark.
+
+        Args:
+            benchmark_type (BenchmarkType): type of benchmark, such as BenchmarkType.MODEL, BenchmarkType.MICRO.
+        """
+        self.__type = benchmark_type
+
+    def set_return_code(self, return_code):
+        """Set the return code.
+
+        Args:
+            return_code (ReturnCode): return code defined in superbench.benchmarks.ReturnCode.
+        """
+        self.__return_code = return_code
+
+    def to_string(self):
+        """Serialize the BenchmarkResult object to string.
+
+        Return:
+            The serialized string of BenchmarkResult object.
+        """
+        formatted_obj = dict()
+        for key in self.__dict__:
+            # The name of internal member is like '_BenchmarkResult__name'.
+            # For the result object return to caller, just keep 'name'.
+            formatted_key = key.split('__')[1]
+            if isinstance(self.__dict__[key], Enum):
+                formatted_obj[formatted_key] = self.__dict__[key].value
+            else:
+                formatted_obj[formatted_key] = self.__dict__[key]
+
+        return json.dumps(formatted_obj)
+
+    @property
+    def name(self):
+        """Decoration function to access __name."""
+        return self.__name
+
+    @property
+    def type(self):
+        """Decoration function to access __type."""
+        return self.__type
+
+    @property
+    def run_count(self):
+        """Decoration function to access __run_count."""
+        return self.__run_count
+
+    @property
+    def return_code(self):
+        """Decoration function to access __return_code."""
+        return self.__return_code
+
+    @property
+    def start_time(self):
+        """Decoration function to access __start_time."""
+        return self.__start_time
+
+    @property
+    def end_time(self):
+        """Decoration function to access __end_time."""
+        return self.__end_time
+
+    @property
+    def raw_data(self):
+        """Decoration function to access __raw_data."""
+        return self.__raw_data
+
+    @property
+    def result(self):
+        """Decoration function to access __result."""
+        return self.__result
--- a/superbench/benchmarks/return_code.py
+++ b/superbench/benchmarks/return_code.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""A module for unified context of benchmarks."""
+
+import enum
+
+
+class Enum(enum.Enum):
+    """Customized Enum class."""
+    @classmethod
+    def get_values(cls):
+        """Return the value list."""
+        values = [item.value for item in cls]
+        return values
+
+
+class ReturnCode(Enum):
+    """The Enum class representing benchmark status."""
+    # Common return codes.
+    SUCCESS = 0
+    INVALID_ARGUMENT = 1
+    INVALID_BENCHMARK_TYPE = 2
+    INVALID_BENCHMARK_RESULT = 3
+    # Return codes related with model benchmarks.
+    NO_SUPPORTED_PRECISION = 10
+    MODEL_TRAIN_FAILURE = 11
+    MODEL_INFERENCE_FAILURE = 12
--- a/superbench/common/errors.py
+++ b/superbench/common/errors.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Exception types for SuperBench errors."""
+
+
+class DuplicateBenchmarkRegistrationError(Exception):
+    """An error is raised for duplicate benchmark registration."""
+    pass
--- a/superbench/common/utils/__init__.py
+++ b/superbench/common/utils/__init__.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Exposes the interface of SuperBench common utilities."""
+
+from .logging import logger
+
+__all__ = ['logger']
--- a/superbench/common/utils/logging.py
+++ b/superbench/common/utils/logging.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""SuperBench loggin module."""
+
+import socket
+import logging
+import sys
+import io
+
+
+class LoggerAdapter(logging.LoggerAdapter):
+    """LoggerAdapter class which add customized function for log error and raise exception."""
+    def log_and_raise(self, exception, msg, *args):
+        """Log error and raise exception.
+
+        Args:
+            exception (BaseException): Exception class.
+            msg (str): logging message.
+            args (dict): arguments dict for message.
+        """
+        self.error(msg, *args)
+        raise exception(msg % args)
+
+
+class Logger:
+    """Logger class which creates logger instance."""
+    @staticmethod
+    def create_logger(name, level=logging.INFO, stream=sys.stdout):
+        """Create logger instance with customized format.
+
+        Args:
+            name (str): project name.
+            level (int): logging level, default is INFO.
+            stream (TextIOBase): stream object, such as stdout or file object,
+              default is sys.stdout.
+
+        Return:
+            logger with the specified name, level and stream.
+        """
+        is_level_valid = True
+        if level not in logging._levelToName.keys():
+            invalid_level = level
+            level = logging.INFO
+            is_level_valid = False
+
+        is_stream_valid = True
+        if not isinstance(stream, io.IOBase):
+            invalid_stream = stream
+            stream = sys.stdout
+            is_stream_valid = False
+
+        formatter = logging.Formatter(
+            '%(asctime)s - %(hostname)s - '
+            '%(filename)s:%(lineno)d - '
+            '%(levelname)s: %(message)s'
+        )
+
+        handler = logging.StreamHandler(stream=stream)
+        handler.setFormatter(formatter)
+        logger = logging.getLogger(name)
+        logger.setLevel(level)
+        logger.addHandler(handler)
+        logger = LoggerAdapter(logger, extra={'hostname': socket.gethostname()})
+
+        if not is_level_valid:
+            logger.error(
+                'Log level is invalid, replace it to logging.INFO - level: {}, expected: {}'.format(
+                    invalid_level, ' '.join(str(x) for x in logging._levelToName.keys())
+                )
+            )
+
+        if not is_stream_valid:
+            logger.error('Stream is invalid, replace it to sys.stdout - stream type: {}'.format(type(invalid_stream)))
+
+        return logger
+
+
+logger = Logger.create_logger('SuperBench', level=logging.INFO)
--- a/tests/benchmarks/model_benchmarks/test_model_base.py
+++ b/tests/benchmarks/model_benchmarks/test_model_base.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for BenchmarkRegistry module."""
+
+from superbench.benchmarks import Platform, Framework, Precision, \
+    BenchmarkContext, BenchmarkRegistry, BenchmarkType, ReturnCode
+from superbench.benchmarks.model_benchmarks import ModelBenchmark
+
+
+class FakeModelBenchmark(ModelBenchmark):
+    """Fake benchmark inherit from ModelBenchmark."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name: benchmark name.
+            parameters: benchmark parameters.
+        """
+        super().__init__(name, parameters)
+        self._supported_precision = [Precision.FLOAT32, Precision.FLOAT16]
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+        self._parser.add_argument(
+            '--hidden_size',
+            type=int,
+            default=1024,
+            required=False,
+            help='Hidden size',
+        )
+
+        self._parser.add_argument(
+            '--seq_len',
+            type=int,
+            default=512,
+            required=False,
+            help='Sequence length',
+        )
+
+    def _init_distributed_setting(self):
+        """Initialize the distributed library and bind the worker to GPU."""
+        pass
+
+    def _generate_dataset(self):
+        """Generate dataset for benchmarking according to shape info."""
+        pass
+
+    def _init_dataloader(self):
+        """Initialize the distributed dataloader."""
+        pass
+
+    def _create_optimizer(self):
+        """Create the optimzier instance used for training."""
+        pass
+
+    def _create_model(self, precision):
+        """Construct the model for benchmarking."""
+        pass
+
+    def _train_step(self, precision):
+        """Define the training process.
+
+        Args:
+            precision (str): precision of model and input data,
+              such as float, half.
+
+        Return:
+            The step-time list of every training step.
+        """
+        duration = []
+        for i in range(self._args.num_steps):
+            duration.append(2)
+        return duration
+
+    def _inference_step(self, precision):
+        """Define the inference process.
+
+        Args:
+            precision (str): precision of model and input data,
+              such as float, half.
+
+        Return:
+            The latency list of every inference operation.
+        """
+        duration = []
+        for i in range(self._args.num_steps):
+            duration.append(4)
+        return duration
+
+    def _cal_params_size(self):
+        """Calculate the parameters scale of the model.
+
+        Return:
+            The count of trainable parameters.
+        """
+        return 200
+
+
+def create_benchmark(params='--num_steps=8'):
+    """Register and create benchmark."""
+    # Register the FakeModelBenchmark benchmark.
+    BenchmarkRegistry.register_benchmark(
+        'pytorch-fake-model',
+        FakeModelBenchmark,
+        parameters='--hidden_size=2',
+        platform=Platform.CUDA,
+    )
+    context = BenchmarkContext('fake-model', Platform.CUDA, parameters=params, framework=Framework.PYTORCH)
+    name = BenchmarkRegistry._BenchmarkRegistry__get_benchmark_name(context)
+    assert (name)
+    (benchmark_class, predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(name, context.platform)
+    assert (benchmark_class)
+    BenchmarkRegistry.clean_benchmarks()
+    return benchmark_class(name, predefine_params + ' ' + context.parameters)
+
+
+def test_arguments_related_interfaces():
+    """Test arguments related interfaces.
+
+    Benchmark.add_parser_arguments(),
+    Benchmark.parse_args(),
+    Benchmark.get_configurable_settings()
+    """
+    # Positive case for parse_args().
+    benchmark = create_benchmark('--num_steps=9')
+    benchmark.add_parser_arguments()
+    (ret, args, unknown) = benchmark.parse_args()
+    assert (ret and args.num_steps == 9)
+
+    # Negative case for parse_args() - invalid precision.
+    benchmark = create_benchmark('--num_steps=8 --precision=fp32')
+    benchmark.add_parser_arguments()
+    (ret, args, unknown) = benchmark.parse_args()
+    assert (ret is False)
+
+    # Test get_configurable_settings().
+    settings = benchmark.get_configurable_settings()
+    expected_settings = (
+        """optional arguments:
+  --run_count int       The run count of benchmark.
+  --duration int        The elapsed time of benchmark in seconds.
+  --num_warmup int      The number of warmup step
+  --num_steps int       The number of test step
+  --batch_size int      The number of batch size
+  --precision Precision [Precision ...]
+                        Model precision. E.g. float16 float32 float64 bfloat16
+                        uint8 int8 int16 int32 int64.
+  --model_action ModelAction [ModelAction ...]
+                        Benchmark model process. E.g. train inference.
+  --distributed_impl DistributedImpl
+                        Distributed implementations. E.g. ddp mirrored
+                        multiworkermirrored parameterserver horovod
+  --distributed_backend DistributedBackend
+                        Distributed backends. E.g. nccl mpi gloo
+  --hidden_size int     Hidden size
+  --seq_len int         Sequence length"""
+    )
+    assert (settings == expected_settings)
+
+
+def test_preprocess():
+    """Test interface Benchmark._preprocess()."""
+    # Positive case for _preprocess().
+    benchmark = create_benchmark('--num_steps=8')
+    assert (benchmark._preprocess())
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    settings = benchmark.get_configurable_settings()
+    expected_settings = (
+        """optional arguments:
+  --run_count int       The run count of benchmark.
+  --duration int        The elapsed time of benchmark in seconds.
+  --num_warmup int      The number of warmup step
+  --num_steps int       The number of test step
+  --batch_size int      The number of batch size
+  --precision Precision [Precision ...]
+                        Model precision. E.g. float16 float32 float64 bfloat16
+                        uint8 int8 int16 int32 int64.
+  --model_action ModelAction [ModelAction ...]
+                        Benchmark model process. E.g. train inference.
+  --distributed_impl DistributedImpl
+                        Distributed implementations. E.g. ddp mirrored
+                        multiworkermirrored parameterserver horovod
+  --distributed_backend DistributedBackend
+                        Distributed backends. E.g. nccl mpi gloo
+  --hidden_size int     Hidden size
+  --seq_len int         Sequence length"""
+    )
+    print(settings)
+    assert (settings == expected_settings)
+
+    # Negative case for _preprocess() - invalid precision.
+    benchmark = create_benchmark('--num_steps=8 --precision=fp32')
+    assert (benchmark._preprocess() is False)
+    assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)
+
+    # Negative case for _preprocess() - invalid benchmark type.
+    benchmark = create_benchmark('--num_steps=8 --precision=float32')
+    benchmark._benchmark_type = Platform.CUDA
+    assert (benchmark._preprocess() is False)
+    assert (benchmark.return_code == ReturnCode.INVALID_BENCHMARK_TYPE)
+
+
+def test_train():
+    """Test interface Benchmark.__train()."""
+    benchmark = create_benchmark()
+    expected_result = (
+        '{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
+        '"start_time": null, "end_time": null, "raw_data": {"steptime_train_float32": [[2, 2, 2, 2, 2, 2, 2, 2]], '
+        '"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
+        '"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0]}}'
+    )
+    assert (benchmark._preprocess())
+    assert (benchmark._ModelBenchmark__train(Precision.FLOAT32))
+    assert (benchmark.serialized_result == expected_result)
+
+    # Step time list is empty (simulate training failure).
+    benchmark = create_benchmark('--num_steps=0')
+    expected_result = (
+        '{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
+        '"start_time": null, "end_time": null, "raw_data": {}, "result": {}}'
+    )
+    assert (benchmark._preprocess())
+    assert (benchmark._ModelBenchmark__train(Precision.FLOAT32) is False)
+    assert (benchmark.serialized_result == expected_result)
+
+
+def test_inference():
+    """Test interface Benchmark.__inference()."""
+    benchmark = create_benchmark()
+    expected_result = (
+        '{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
+        '"start_time": null, "end_time": null, "raw_data": {"steptime_inference_float16": [[4, 4, 4, 4, 4, 4, 4, 4]], '
+        '"throughput_inference_float16": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
+        '"result": {"steptime_inference_float16": [4.0], "throughput_inference_float16": [8000.0]}}'
+    )
+    assert (benchmark._preprocess())
+    assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16))
+    assert (benchmark.serialized_result == expected_result)
+
+    # Step time list is empty (simulate inference failure).
+    benchmark = create_benchmark('--num_steps=0')
+    expected_result = (
+        '{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
+        '"start_time": null, "end_time": null, "raw_data": {}, "result": {}}'
+    )
+    assert (benchmark._preprocess())
+    assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16) is False)
+    assert (benchmark.serialized_result == expected_result)
+
+
+def test_benchmark():
+    """Test interface Benchmark._benchmark()."""
+    # Positive case for _benchmark().
+    benchmark = create_benchmark()
+    benchmark._preprocess()
+    assert (benchmark._benchmark())
+    assert (benchmark.name == 'pytorch-fake-model')
+    assert (benchmark.type == BenchmarkType.MODEL)
+    assert (benchmark.run_count == 1)
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    expected_raw_data = {
+        'steptime_train_float32': [[2, 2, 2, 2, 2, 2, 2, 2]],
+        'throughput_train_float32': [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]],
+        'steptime_train_float16': [[2, 2, 2, 2, 2, 2, 2, 2]],
+        'throughput_train_float16': [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]
+    }
+    assert (benchmark.raw_data == expected_raw_data)
+    expected_result = {
+        'steptime_train_float32': [2.0],
+        'throughput_train_float32': [16000.0],
+        'steptime_train_float16': [2.0],
+        'throughput_train_float16': [16000.0]
+    }
+    assert (benchmark.result == expected_result)
+
+    expected_serialized_result = (
+        '{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, "start_time": null, '
+        '"end_time": null, "raw_data": {"steptime_train_float32": [[2, 2, 2, 2, 2, 2, 2, 2]], '
+        '"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
+        '"steptime_train_float16": [[2, 2, 2, 2, 2, 2, 2, 2]], '
+        '"throughput_train_float16": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
+        '"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0], '
+        '"steptime_train_float16": [2.0], "throughput_train_float16": [16000.0]}}'
+    )
+    assert (benchmark.serialized_result == expected_serialized_result)
+
+    # Negative case for _benchmark() - no supported precision found.
+    benchmark = create_benchmark('--precision=int16')
+    assert (benchmark._preprocess())
+    assert (benchmark._benchmark() is False)
+    assert (benchmark.return_code == ReturnCode.NO_SUPPORTED_PRECISION)
+
+    # Negative case for _benchmark() - model train failure, step time list is empty.
+    benchmark = create_benchmark('--num_steps=0')
+    assert (benchmark._preprocess())
+    assert (benchmark._benchmark() is False)
+    assert (benchmark.return_code == ReturnCode.MODEL_TRAIN_FAILURE)
+
+    # Negative case for _benchmark() - model inference failure, step time list is empty.
+    benchmark = create_benchmark('--model_action=inference --num_steps=0')
+    assert (benchmark._preprocess())
+    assert (benchmark._benchmark() is False)
+    assert (benchmark.return_code == ReturnCode.MODEL_INFERENCE_FAILURE)
+
+
+def test_check_result_format():
+    """Test interface Benchmark.__check_result_format()."""
+    # Positive case for __check_result_format().
+    benchmark = create_benchmark()
+    benchmark._preprocess()
+    assert (benchmark._benchmark())
+    assert (benchmark._Benchmark__check_result_type())
+    assert (benchmark._Benchmark__check_summarized_result())
+    assert (benchmark._Benchmark__check_raw_data())
+
+    # Negative case for __check_result_format() - change List[int] to List[str].
+    benchmark._result._BenchmarkResult__result = {'metric1': ['2.0']}
+    assert (benchmark._Benchmark__check_summarized_result() is False)
+
+    # Negative case for __check_raw_data() - change List[List[int]] to List[List[str]].
+    benchmark._result._BenchmarkResult__raw_data = {'metric1': [['2.0']]}
+    assert (benchmark._Benchmark__check_raw_data() is False)
+
+    # Negative case for __check_raw_data() - invalid benchmark result.
+    assert (benchmark._Benchmark__check_result_format() is False)
+    assert (benchmark.return_code == ReturnCode.INVALID_BENCHMARK_RESULT)
--- a/tests/benchmarks/test_context.py
+++ b/tests/benchmarks/test_context.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for BenchmarkResult module."""
+
+from superbench.benchmarks import BenchmarkContext, Platform, Framework
+
+
+def test_benchmark_context():
+    """Test BenchmarkContext class."""
+    context = BenchmarkContext('pytorch-bert-large', Platform.CUDA, 'batch_size=8', framework=Framework.PYTORCH)
+    assert (context.name == 'pytorch-bert-large')
+    assert (context.platform == Platform.CUDA)
+    assert (context.parameters == 'batch_size=8')
+    assert (context.framework == Framework.PYTORCH)
--- a/tests/benchmarks/test_registry.py
+++ b/tests/benchmarks/test_registry.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for BenchmarkRegistry module."""
+
+import re
+
+from superbench.benchmarks import Platform, Framework, BenchmarkType, BenchmarkContext, BenchmarkRegistry, ReturnCode
+from superbench.benchmarks.micro_benchmarks import MicroBenchmark
+
+
+class AccumulationBenchmark(MicroBenchmark):
+    """Benchmark that do accumulation from lower_bound to upper_bound."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name: benchmark name.
+            parameters: benchmark parameters.
+        """
+        super().__init__(name, parameters)
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+
+        self._parser.add_argument(
+            '--lower_bound',
+            type=int,
+            default=0,
+            required=False,
+            help='The lower bound for accumulation.',
+        )
+
+        self._parser.add_argument(
+            '--upper_bound',
+            type=int,
+            default=2,
+            required=False,
+            help='The upper bound for accumulation.',
+        )
+
+    def _benchmark(self):
+        """Implementation for benchmarking."""
+        raw_data = []
+        result = 0
+        for i in range(self._args.lower_bound, self._args.upper_bound):
+            result += i
+            raw_data.append(str(result))
+
+        metric = 'accumulation_result'
+        self._result.add_raw_data(metric, ','.join(raw_data))
+        self._result.add_result(metric, result)
+
+        return True
+
+
+def test_register_benchmark():
+    """Test interface BenchmarkRegistry.register_benchmark()."""
+    # Register the benchmark for all platform if use default platform.
+    BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark)
+    for platform in Platform:
+        context = BenchmarkContext('accumulation', platform)
+        assert (BenchmarkRegistry.is_benchmark_registered(context))
+
+    BenchmarkRegistry.clean_benchmarks()
+
+    # Register the benchmark for CUDA platform if use platform=Platform.CUDA.
+    BenchmarkRegistry.register_benchmark('accumulation-cuda', AccumulationBenchmark, platform=Platform.CUDA)
+    context = BenchmarkContext('accumulation-cuda', Platform.CUDA)
+    assert (BenchmarkRegistry.is_benchmark_registered(context))
+    context = BenchmarkContext('accumulation-cuda', Platform.ROCM)
+    assert (BenchmarkRegistry.is_benchmark_registered(context) is False)
+
+    BenchmarkRegistry.clean_benchmarks()
+
+
+def test_is_benchmark_context_valid():
+    """Test interface BenchmarkRegistry.is_benchmark_context_valid()."""
+    # Positive case.
+    context = BenchmarkContext('accumulation', Platform.CPU)
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+
+    # Negative case.
+    context = 'context'
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context) is False)
+    context = None
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context) is False)
+
+
+def test_get_benchmark_name():
+    """Test interface BenchmarkRegistry.get_benchmark_name()."""
+    # Register benchmarks for testing.
+    benchmark_names = ['accumulation', 'pytorch-accumulation', 'tf1-accumulation', 'onnx-accumulation']
+    for name in benchmark_names:
+        BenchmarkRegistry.register_benchmark(name, AccumulationBenchmark)
+
+    # Test benchmark name for different Frameworks.
+    benchmark_frameworks = [Framework.NONE, Framework.PYTORCH, Framework.TENSORFLOW1, Framework.ONNX]
+    for i in range(len(benchmark_names)):
+        context = BenchmarkContext('accumulation', Platform.CPU, framework=benchmark_frameworks[i])
+        name = BenchmarkRegistry._BenchmarkRegistry__get_benchmark_name(context)
+        assert (name == benchmark_names[i])
+
+    BenchmarkRegistry.clean_benchmarks()
+
+
+def test_check_parameters():
+    """Test interface BenchmarkRegistry.check_parameters()."""
+    # Register benchmarks for testing.
+    BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark)
+
+    # Positive case.
+    context = BenchmarkContext('accumulation', Platform.CPU, parameters='--lower_bound=1')
+    assert (BenchmarkRegistry.check_parameters(context))
+
+    # Negative case.
+    context = BenchmarkContext('accumulation', Platform.CPU, parameters='--lower=1')
+    assert (BenchmarkRegistry.check_parameters(context) is False)
+
+    BenchmarkRegistry.clean_benchmarks()
+
+
+def test_get_benchmark_configurable_settings():
+    """Test BenchmarkRegistry interface.
+
+    BenchmarkRegistry.get_benchmark_configurable_settings().
+    """
+    # Register benchmarks for testing.
+    BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark)
+
+    context = BenchmarkContext('accumulation', Platform.CPU)
+    settings = BenchmarkRegistry.get_benchmark_configurable_settings(context)
+
+    expected = """optional arguments:
+  --run_count int    The run count of benchmark.
+  --duration int     The elapsed time of benchmark in seconds.
+  --lower_bound int  The lower bound for accumulation.
+  --upper_bound int  The upper bound for accumulation."""
+    assert (settings == expected)
+
+    BenchmarkRegistry.clean_benchmarks()
+
+
+def test_launch_benchmark():
+    """Test interface BenchmarkRegistry.launch_benchmark()."""
+    # Register benchmarks for testing.
+    BenchmarkRegistry.register_benchmark(
+        'accumulation', AccumulationBenchmark, parameters='--upper_bound=5', platform=Platform.CPU
+    )
+
+    # Launch benchmark.
+    context = BenchmarkContext('accumulation', Platform.CPU, parameters='--lower_bound=1')
+
+    if BenchmarkRegistry.check_parameters(context):
+        benchmark = BenchmarkRegistry.launch_benchmark(context)
+        assert (benchmark)
+        assert (benchmark.name == 'accumulation')
+        assert (benchmark.type == BenchmarkType.MICRO)
+        assert (benchmark.run_count == 1)
+        assert (benchmark.return_code == ReturnCode.SUCCESS)
+        assert (benchmark.raw_data == {'accumulation_result': ['1,3,6,10']})
+        assert (benchmark.result == {'accumulation_result': [10]})
+
+        # Replace the timestamp as null.
+        result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result)
+        expected = (
+            '{"name": "accumulation", "type": "micro", "run_count": 1, '
+            '"return_code": 0, "start_time": null, "end_time": null, '
+            '"raw_data": {"accumulation_result": ["1,3,6,10"]}, '
+            '"result": {"accumulation_result": [10]}}'
+        )
+        assert (result == expected)
+
+    # Launch benchmark with overridden parameters.
+    context = BenchmarkContext('accumulation', Platform.CPU, parameters='--lower_bound=1 --upper_bound=4')
+    if BenchmarkRegistry.check_parameters(context):
+        benchmark = BenchmarkRegistry.launch_benchmark(context)
+        assert (benchmark)
+        assert (benchmark.name == 'accumulation')
+        assert (benchmark.type == BenchmarkType.MICRO)
+        assert (benchmark.run_count == 1)
+        assert (benchmark.return_code == ReturnCode.SUCCESS)
+        assert (benchmark.raw_data == {'accumulation_result': ['1,3,6']})
+        assert (benchmark.result == {'accumulation_result': [6]})
+
+        # Replace the timestamp as null.
+        result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result)
+        expected = (
+            '{"name": "accumulation", "type": "micro", "run_count": 1, '
+            '"return_code": 0, "start_time": null, "end_time": null, '
+            '"raw_data": {"accumulation_result": ["1,3,6"]}, '
+            '"result": {"accumulation_result": [6]}}'
+        )
+        assert (result == expected)
+
+    # Failed to launch benchmark.
+    context = BenchmarkContext(
+        'accumulation', Platform.CPU, parameters='--lower_bound=1 --upper_bound=4', framework=Framework.PYTORCH
+    )
+    assert (BenchmarkRegistry.check_parameters(context) is False)
+
+    BenchmarkRegistry.clean_benchmarks()
--- a/tests/benchmarks/test_result.py
+++ b/tests/benchmarks/test_result.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for BenchmarkResult module."""
+
+from superbench.benchmarks import BenchmarkType, ReturnCode
+from superbench.benchmarks.result import BenchmarkResult
+
+
+def test_add_raw_data():
+    """Test interface BenchmarkResult.add_raw_data()."""
+    result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
+    result.add_raw_data('metric1', 'raw log 1')
+    result.add_raw_data('metric1', 'raw log 2')
+    assert (result.raw_data['metric1'][0] == 'raw log 1')
+    assert (result.raw_data['metric1'][1] == 'raw log 2')
+    assert (result.type == BenchmarkType.MICRO.value)
+    assert (result.return_code == ReturnCode.SUCCESS.value)
+
+    result = BenchmarkResult('model', BenchmarkType.MODEL.value, ReturnCode.SUCCESS.value)
+    result.add_raw_data('metric1', [1, 2, 3])
+    result.add_raw_data('metric1', [4, 5, 6])
+
+    assert (result.raw_data['metric1'][0] == [1, 2, 3])
+    assert (result.raw_data['metric1'][1] == [4, 5, 6])
+    assert (result.type == BenchmarkType.MODEL.value)
+    assert (result.return_code == ReturnCode.SUCCESS.value)
+
+
+def test_add_result():
+    """Test interface BenchmarkResult.add_result()."""
+    result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
+    result.add_result('metric1', 300)
+    result.add_result('metric1', 200)
+    assert (result.result['metric1'][0] == 300)
+    assert (result.result['metric1'][1] == 200)
+
+
+def test_set_timestamp():
+    """Test interface BenchmarkResult.set_timestamp()."""
+    result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
+    start_time = '2021-02-03 16:59:49'
+    end_time = '2021-02-03 17:00:08'
+    result.set_timestamp(start_time, end_time)
+    assert (result.start_time == start_time)
+    assert (result.end_time == end_time)
+
+
+def test_set_benchmark_type():
+    """Test interface BenchmarkResult.set_benchmark_type()."""
+    result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
+    result.set_benchmark_type(BenchmarkType.MICRO.value)
+    assert (result.type == BenchmarkType.MICRO.value)
+
+
+def test_set_return_code():
+    """Test interface BenchmarkResult.set_return_code()."""
+    result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
+    assert (result.return_code == ReturnCode.SUCCESS.value)
+    result.set_return_code(ReturnCode.INVALID_ARGUMENT.value)
+    assert (result.return_code == ReturnCode.INVALID_ARGUMENT.value)
+    result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT.value)
+    assert (result.return_code == ReturnCode.INVALID_BENCHMARK_RESULT.value)
+
+
+def test_serialize_deserialize():
+    """Test serialization/deserialization and compare the results."""
+    # Result with one metric.
+    result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value, run_count=2)
+    result.add_result('metric1', 300)
+    result.add_result('metric1', 200)
+    result.add_result('metric2', 100)
+    result.add_raw_data('metric1', [1, 2, 3])
+    result.add_raw_data('metric1', [4, 5, 6])
+    result.add_raw_data('metric1', [7, 8, 9])
+    start_time = '2021-02-03 16:59:49'
+    end_time = '2021-02-03 17:00:08'
+    result.set_timestamp(start_time, end_time)
+    result.set_benchmark_type(BenchmarkType.MICRO.value)
+
+    expected = (
+        '{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, '
+        '"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", '
+        '"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, '
+        '"result": {"metric1": [300, 200], "metric2": [100]}}'
+    )
+    assert (result.to_string() == expected)