Unverified Commit 4c87a3e4 authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

Benchmarks: Initialization - Add base class, registry, and result (#1)



* benchmarks init.
Co-authored-by: default avatarGuoshuai Zhao <guzhao@microsoft.com>
parent d32b96eb
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Exposes interfaces of benchmarks used by SuperBench executor."""
from .return_code import ReturnCode
from .context import Platform, Framework, Precision, ModelAction, BenchmarkType, BenchmarkContext
from .registry import BenchmarkRegistry
__all__ = [
'ReturnCode', 'Platform', 'Framework', 'BenchmarkType', 'Precision', 'ModelAction', 'BenchmarkContext',
'BenchmarkRegistry'
]
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the base class."""
import argparse
import numbers
from datetime import datetime
from abc import ABC, abstractmethod
from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkType, ReturnCode
from superbench.benchmarks.result import BenchmarkResult
class Benchmark(ABC):
"""The base class of all benchmarks."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
self._name = name
self._argv = list(filter(None, parameters.split(' ')))
self._benchmark_type = None
self._parser = argparse.ArgumentParser(
add_help=False,
usage=argparse.SUPPRESS,
allow_abbrev=False,
formatter_class=argparse.MetavarTypeHelpFormatter
)
self._args = None
self._curr_run_index = 0
self._result = None
def add_parser_arguments(self):
"""Add the specified arguments."""
self._parser.add_argument(
'--run_count',
type=int,
default=1,
required=False,
help='The run count of benchmark.',
)
self._parser.add_argument(
'--duration',
type=int,
default=0,
required=False,
help='The elapsed time of benchmark in seconds.',
)
def get_configurable_settings(self):
"""Get all the configurable settings.
Return:
All configurable settings in raw string.
"""
return self._parser.format_help().strip()
def parse_args(self):
"""Parse the arguments.
Return:
ret (bool): whether parse succeed or not.
args (argparse.Namespace): parsed arguments.
unknown (list): unknown arguments.
"""
try:
args, unknown = self._parser.parse_known_args(self._argv)
except BaseException as e:
logger.error('Invalid argument - benchmark: {}, message: {}.'.format(self._name, str(e)))
return False, None, None
if len(unknown) > 0:
logger.warning(
'Benchmark has unknown arguments - benchmark: {}, unknown arguments: {}'.format(
self._name, ' '.join(unknown)
)
)
return True, args, unknown
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
self.add_parser_arguments()
ret, self._args, unknown = self.parse_args()
if not ret:
self._result = BenchmarkResult(self._name, self._benchmark_type, ReturnCode.INVALID_ARGUMENT)
return False
self._result = BenchmarkResult(
self._name, self._benchmark_type, ReturnCode.SUCCESS, run_count=self._args.run_count
)
if not isinstance(self._benchmark_type, BenchmarkType):
logger.error(
'Invalid benchmark type - benchmark: {}, type: {}'.format(self._name, type(self._benchmark_type))
)
self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_TYPE)
return False
return True
@abstractmethod
def _benchmark(self):
"""Implementation for benchmarking."""
pass
def run(self):
"""Function to launch the benchmarking.
Return:
True if run benchmark successfully.
"""
if not self._preprocess():
return False
self._start_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
for self._curr_run_index in range(self._args.run_count):
if not self._benchmark():
return False
self._end_time = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
self._result.set_timestamp(self._start_time, self._end_time)
if not self.__check_result_format():
return False
return True
def __check_result_format(self):
"""Check the validation of result object.
Return:
True if the result is valid.
"""
if (not self.__check_result_type()) or (not self.__check_summarized_result()) or (not self.__check_raw_data()):
self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT)
return False
return True
def __check_result_type(self):
"""Check the type of result object.
Return:
True if the result is instance of BenchmarkResult.
"""
if not isinstance(self._result, BenchmarkResult):
logger.error(
'Invalid benchmark result type - benchmark: {}, type: {}'.format(self._name, type(self._result))
)
return False
return True
def __check_summarized_result(self):
"""Check the validation of summary result.
Return:
True if the summary result is instance of List[Number].
"""
for metric in self._result.result:
is_valid = isinstance(self._result.result[metric], list)
if is_valid:
for value in self._result.result[metric]:
if not isinstance(value, numbers.Number):
is_valid = False
break
if not is_valid:
logger.error(
'Invalid summarized result - benchmark: {}, metric name: {}, expect: List[Number], got: {}.'.format(
self._name, metric, type(self._result.result[metric])
)
)
return False
return True
def __check_raw_data(self):
"""Check the validation of raw data.
Return:
True if the raw data is:
instance of List[List[Number]] for BenchmarkType.MODEL, and BenchmarkType.DOCKER.
instance of List[str] for BenchmarkType.MICRO.
"""
for metric in self._result.raw_data:
is_valid = isinstance(self._result.raw_data[metric], list)
if is_valid:
for run in self._result.raw_data[metric]:
if self._benchmark_type in [BenchmarkType.MODEL, BenchmarkType.DOCKER]:
if not isinstance(run, list):
is_valid = False
break
for value in run:
if not isinstance(value, numbers.Number):
is_valid = False
break
elif self._benchmark_type in [BenchmarkType.MICRO]:
is_valid = isinstance(run, str)
if not is_valid:
logger.error(
'Invalid raw data - benchmark: {}, metric name: {}, expect: {}, got: {}.'.format(
self._name, metric,
'List[str]' if self._benchmark_type == BenchmarkType.MICRO else 'List[List[Number]]',
type(self._result.raw_data[metric])
)
)
return False
return True
def print_env_info(self):
"""Print environments or dependencies information."""
# TODO: will implement it when add real benchmarks in the future.
pass
@property
def name(self):
"""Decoration function to access benchmark name."""
return self._result.name
@property
def type(self):
"""Decoration function to access benchmark type."""
return self._result.type
@property
def run_count(self):
"""Decoration function to access benchmark run_count."""
return self._result.run_count
@property
def return_code(self):
"""Decoration function to access benchmark return_code."""
return self._result.return_code
@property
def start_time(self):
"""Decoration function to access benchmark start_time."""
return self._result.start_time
@property
def end_time(self):
"""Decoration function to access benchmark end_time."""
return self._result.end_time
@property
def raw_data(self):
"""Decoration function to access benchmark raw_data."""
return self._result.raw_data
@property
def result(self):
"""Decoration function to access benchmark result."""
return self._result.result
@property
def serialized_result(self):
"""Decoration function to access benchmark result."""
return self._result.to_string()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""A module for unified context of benchmarks."""
import enum
class Enum(enum.Enum):
"""Customized Enum class."""
@classmethod
def get_values(cls):
"""Return the value list."""
values = [item.value for item in cls]
return values
def __str__(self):
"""Value as the string."""
return self.value
class Platform(Enum):
"""The Enum class representing different platforms."""
CPU = 'CPU'
CUDA = 'CUDA'
ROCM = 'ROCm'
class Framework(Enum):
"""The Enum class representing different frameworks."""
ONNX = 'onnx'
PYTORCH = 'pytorch'
TENSORFLOW1 = 'tf1'
TENSORFLOW2 = 'tf2'
NONE = 'none'
class BenchmarkType(Enum):
"""The Enum class representing different types of benchmarks."""
MODEL = 'model'
MICRO = 'micro'
DOCKER = 'docker'
class Precision(Enum):
"""The Enum class representing different data precisions."""
FLOAT16 = 'float16'
FLOAT32 = 'float32'
FLOAT64 = 'float64'
BFLOAT16 = 'bfloat16'
UINT8 = 'uint8'
INT8 = 'int8'
INT16 = 'int16'
INT32 = 'int32'
INT64 = 'int64'
class ModelAction(Enum):
"""The Enum class representing different model process."""
TRAIN = 'train'
INFERENCE = 'inference'
class BenchmarkContext():
"""Context class of all benchmarks.
Containing all information to launch one benchmark.
"""
def __init__(self, name, platform, parameters='', framework=Framework.NONE):
"""Constructor.
Args:
name (str): name of benchmark in config file.
platform (Platform): Platform types like CUDA, ROCM.
parameters (str): predefined parameters of benchmark.
framework (Framework): Framework types like ONNX, PYTORCH.
"""
self.__name = name
self.__platform = platform
self.__parameters = parameters
self.__framework = framework
@property
def name(self):
"""Decoration function to access __name."""
return self.__name
@property
def platform(self):
"""Decoration function to access __platform."""
return self.__platform
@property
def parameters(self):
"""Decoration function to access __parameters."""
return self.__parameters
@property
def framework(self):
"""Decoration function to access __framework."""
return self.__framework
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""A module containing all the benchmarks packaged in docker."""
from .docker_base import DockerBenchmark
__all__ = ['DockerBenchmark']
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the docker-benchmark base class."""
from abc import abstractmethod
from superbench.benchmarks import BenchmarkType
from superbench.benchmarks.base import Benchmark
class DockerBenchmark(Benchmark):
"""The base class of benchmarks packaged in docker container."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super().__init__(name, parameters)
self._benchmark_type = BenchmarkType.DOCKER
# Command lines to launch the docker image and run the benchmarks inside docker.
self.__commands = list()
'''
# If need to add new arguments, super().add_parser_arguments() must be called.
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
'''
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
return super()._preprocess()
@abstractmethod
def _benchmark(self):
"""Implementation for benchmarking."""
pass
def _process_docker_result(self, output):
"""Function to process raw results and save the summarized results.
Args:
output (str): raw output string of the docker benchmark.
"""
# TODO: will implement it when add real benchmarks in the future.
pass
def print_env_info(self):
"""Print environments or dependencies information."""
# TODO: will implement it when add real benchmarks in the future.
pass
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""A module containing all the micro-benchmarks."""
from .micro_base import MicroBenchmark
__all__ = ['MicroBenchmark']
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the micro-benchmark base class."""
from abc import abstractmethod
from superbench.benchmarks import BenchmarkType
from superbench.benchmarks.base import Benchmark
class MicroBenchmark(Benchmark):
"""The base class of micro-benchmarks."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super().__init__(name, parameters)
self._benchmark_type = BenchmarkType.MICRO
# Command lines to launch the micro-benchmarks.
self.__commands = list()
'''
# If need to add new arguments, super().add_parser_arguments() must be called.
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
'''
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
return super()._preprocess()
@abstractmethod
def _benchmark(self):
"""Implementation for benchmarking."""
pass
def _process_micro_result(self, output):
"""Function to process raw results and save the summarized results.
Args:
output (str): raw output string of the micro-benchmark.
"""
# TODO: will implement it when add real benchmarks in the future.
pass
def print_env_info(self):
"""Print environments or dependencies information."""
# TODO: will implement it when add real benchmarks in the future.
pass
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""A module containing all the e2e model related benchmarks."""
from .model_base import ModelBenchmark
__all__ = ['ModelBenchmark']
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the model-benchmark base class."""
from abc import abstractmethod
from superbench.common.utils import logger
from superbench.benchmarks import Precision, ModelAction, BenchmarkType, ReturnCode
from superbench.benchmarks.base import Benchmark
from superbench.benchmarks.context import Enum
class DistributedImpl(Enum):
"""The Enum class representing different distributed implementations."""
DDP = 'ddp'
MIRRORED = 'mirrored'
MW_MIRRORED = 'multiworkermirrored'
PS = 'parameterserver'
HOROVOD = 'horovod'
class DistributedBackend(Enum):
"""The Enum class representing different distributed backends."""
NCCL = 'nccl'
MPI = 'mpi'
GLOO = 'gloo'
class ModelBenchmark(Benchmark):
"""The base class of E2E model benchmarks."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super().__init__(name, parameters)
self._benchmark_type = BenchmarkType.MODEL
self._world_size = None
self._dataset = None
self._dataloader = None
self._model = None
self._optimizer = None
self._loss_fn = None
self._target = None
self._supported_precision = []
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
self._parser.add_argument(
'--num_warmup',
type=int,
default=64,
required=False,
help='The number of warmup step',
)
self._parser.add_argument(
'--num_steps',
type=int,
default=2048,
required=False,
help='The number of test step',
)
self._parser.add_argument(
'--batch_size',
type=int,
default=32,
required=False,
help='The number of batch size',
)
self._parser.add_argument(
'--precision',
type=Precision,
default=[Precision.FLOAT32, Precision.FLOAT16],
nargs='+',
required=False,
help='Model precision. E.g. {}.'.format(' '.join(Precision.get_values())),
)
self._parser.add_argument(
'--model_action',
type=ModelAction,
default=[ModelAction.TRAIN],
nargs='+',
required=False,
help='Benchmark model process. E.g. {}.'.format(' '.join(ModelAction.get_values())),
)
self._parser.add_argument(
'--distributed_impl',
type=DistributedImpl,
default=None,
required=False,
help='Distributed implementations. E.g. {}'.format(' '.join(DistributedImpl.get_values())),
)
self._parser.add_argument(
'--distributed_backend',
type=DistributedBackend,
default=None,
required=False,
help='Distributed backends. E.g. {}'.format(' '.join(DistributedBackend.get_values())),
)
@abstractmethod
def _init_distributed_setting(self):
"""Initialize the distributed library and bind the worker to GPU."""
pass
@abstractmethod
def _generate_dataset(self):
"""Generate dataset for benchmarking according to shape info."""
pass
@abstractmethod
def _init_dataloader(self):
"""Initialize the distributed dataloader."""
pass
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
ret = super()._preprocess()
if not ret:
return False
self._init_distributed_setting()
self._generate_dataset()
self._init_dataloader()
return True
@abstractmethod
def _create_optimizer(self):
"""Create the optimzier instance used for training."""
pass
@abstractmethod
def _create_model(self, precision):
"""Construct the model for benchmarking.
Args:
precision (Precision): precision of model and input data, such as float32, float16.
"""
pass
def __train(self, precision):
"""Launch the training benchmark.
Args:
precision (Precision): precision of model and input data, such as float32, float16.
Return:
True if step_times list is not empty.
"""
self._create_model(precision)
self._create_optimizer()
# The unit of step time should be millisecond.
step_times = self._train_step(precision)
if len(step_times) == 0:
logger.error(
'Step time list for training is empty - round: {}, model: {}, precision: {}.'.format(
self._curr_run_index, self._name, precision
)
)
return False
average_time = sum(step_times) / len(step_times)
logger.info(
'Average train time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
self._curr_run_index, self._name, precision, average_time
)
)
self.__process_model_result(ModelAction.TRAIN, precision, step_times)
return True
def __inference(self, precision):
"""Launch the inference benchmark.
Args:
precision (Precision): precision of model and input data, such as float32, float16.
Return:
True if step_times list is not empty.
"""
self._create_model(precision)
# The unit of step time should be millisecond.
step_times = self._inference_step(precision)
if len(step_times) == 0:
logger.error(
'Step time list for inference is empty - round: {}, model: {}, precision: {}.'.format(
self._curr_run_index, self._name, precision
)
)
return False
average_time = sum(step_times) / len(step_times)
logger.info(
'Average inference time - round: {}, model: {}, precision: {}, step time: {:.6f} ms.'.format(
self._curr_run_index, self._name, precision, average_time
)
)
self.__process_model_result(ModelAction.INFERENCE, precision, step_times)
return True
@abstractmethod
def _train_step(self, precision):
"""Define the training process.
Args:
precision (Precision): precision of model and input data, such as float32, float16.
Return:
The step-time list of every training step.
"""
pass
@abstractmethod
def _inference_step(self, precision):
"""Define the inference process.
Args:
precision (Precision): precision of model and input data,
such as float32, float16.
Return:
The latency list of every inference operation.
"""
pass
def _benchmark(self):
"""Implementation for benchmarking.
Return:
True if run benchmark successfully.
"""
precision_need_to_run = list()
for precision in self._args.precision:
# Check if the precision is supported or not.
if precision not in self._supported_precision:
logger.warning(
'Can not run with specified precision - model: {}, supprted precision: {}, specified precision: {}'.
format(self._name, ' '.join([p.value for p in self._supported_precision]), precision)
)
else:
precision_need_to_run.append(precision)
if len(precision_need_to_run) == 0:
self._result.set_return_code(ReturnCode.NO_SUPPORTED_PRECISION)
return False
for precision in precision_need_to_run:
for model_action in self._args.model_action:
if model_action == ModelAction.TRAIN:
if not self.__train(precision):
self._result.set_return_code(ReturnCode.MODEL_TRAIN_FAILURE)
return False
elif model_action == ModelAction.INFERENCE:
if not self.__inference(precision):
self._result.set_return_code(ReturnCode.MODEL_INFERENCE_FAILURE)
return False
else:
logger.warning(
'Model action has no implementation yet - model: {}, model_action: {}'.format(
self._name, model_action
)
)
return True
def __process_model_result(self, model_action, precision, step_times):
"""Function to process raw results and save the summarized results.
Args:
model_action (ModelAction): train or inference.
precision (Precision): precision of model and input data, such as float32, float16.
step_times (list): The step time list of every training/inference step, unit is millisecond.
"""
metric = 'steptime_{}_{}'.format(model_action.value, precision.value)
self._result.add_raw_data(metric, step_times)
avg = sum(step_times) / len(step_times)
self._result.add_result(metric, avg)
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
millisecond_per_second = 1000
throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times]
metric = 'throughput_{}_{}'.format(model_action.value, precision.value)
self._result.add_raw_data(metric, throughput)
avg = sum(throughput) / len(throughput)
self._result.add_result(metric, avg)
@abstractmethod
def _cal_params_size(self):
"""Calculate the parameters scale of the model.
Return:
The count of trainable parameters.
"""
pass
def print_env_info(self):
"""Print environments or dependencies information."""
# TODO: will implement it when add real benchmarks in the future.
pass
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Interfaces that provide access to benchmarks."""
from typing import Dict
from superbench.common.utils import logger
from superbench.common.errors import DuplicateBenchmarkRegistrationError
from superbench.benchmarks import Platform, Framework, BenchmarkContext
from superbench.benchmarks.base import Benchmark
class BenchmarkRegistry:
"""Class that minatains all benchmarks.
Provide the following functions:
Register new benchmark.
Get the internal benchmark name.
Check the validation of benchmark parameters.
Get all configurable settings of benchmark.
Launch one benchmark and return the result.
"""
benchmarks: Dict[str, dict] = dict()
@classmethod
def register_benchmark(cls, name, class_def, parameters=None, platform=None):
"""Register new benchmark, key is the benchmark name.
Args:
name (str): internal name of benchmark.
class_def (Benchmark): class object of benchmark.
parameters (str): predefined parameters of benchmark.
platform (Platform): Platform types like CUDA, ROCM.
"""
if not name or not isinstance(name, str):
logger.log_and_raise(
TypeError,
'Name of registered benchmark is not string - benchmark: {}, type: {}'.format(name, type(name))
)
if not issubclass(class_def, Benchmark):
logger.log_and_raise(
TypeError,
'Registered class is not subclass of Benchmark - benchmark: {}, type: {}'.format(name, type(class_def))
)
if name not in cls.benchmarks:
cls.benchmarks[name] = dict()
if platform:
if platform not in Platform:
platform_list = list(map(str, Platform))
logger.log_and_raise(
TypeError, 'Unknown platform - benchmark: {}, supportted platforms: {}, but got: {}'.format(
name, platform_list, platform
)
)
if platform not in cls.benchmarks[name]:
cls.benchmarks[name][platform] = (class_def, parameters)
else:
logger.log_and_raise(
DuplicateBenchmarkRegistrationError,
'Duplicate registration - benchmark: {}, platform: {}'.format(name, platform)
)
else:
# If not specified the tag, means the
# benchmark works for all platforms.
for p in Platform:
if p not in cls.benchmarks[name]:
cls.benchmarks[name][p] = (class_def, parameters)
else:
logger.log_and_raise(
DuplicateBenchmarkRegistrationError, 'Duplicate registration - benchmark: {}'.format(name)
)
@classmethod
def is_benchmark_context_valid(cls, benchmark_context):
"""Check wether the benchmark context is valid or not.
Args:
benchmark_context (BenchmarkContext): the benchmark context.
Return:
ret (bool): return True if context is valid.
"""
if isinstance(benchmark_context, BenchmarkContext) and benchmark_context.name:
return True
else:
logger.error('Benchmark has invalid context')
return False
@classmethod
def __get_benchmark_name(cls, benchmark_context):
"""Return the internal benchmark name.
Args:
benchmark_context (BenchmarkContext): the benchmark context.
Return:
benchmark_name (str): internal benchmark name, None means context is invalid.
"""
if not cls.is_benchmark_context_valid(benchmark_context):
return None
benchmark_name = benchmark_context.name
framework = benchmark_context.framework
if framework != Framework.NONE:
benchmark_name = framework.value + '-' + benchmark_name
return benchmark_name
@classmethod
def check_parameters(cls, benchmark_context):
"""Check the validation of customized parameters.
Args:
benchmark_context (BenchmarkContext): the benchmark context.
Return:
Return True if benchmark exists and context/parameters are valid.
"""
if not cls.is_benchmark_context_valid(benchmark_context):
return False
benchmark_name = cls.__get_benchmark_name(benchmark_context)
platform = benchmark_context.platform
customized_parameters = benchmark_context.parameters
if benchmark_name:
(benchmark_class, params) = cls.__select_benchmark(benchmark_name, platform)
if benchmark_class:
benchmark = benchmark_class(benchmark_name, customized_parameters)
benchmark.add_parser_arguments()
ret, args, unknown = benchmark.parse_args()
if ret and len(unknown) < 1:
return True
return False
@classmethod
def get_benchmark_configurable_settings(cls, benchmark_context):
"""Get all configurable settings of benchmark.
Args:
benchmark_context (BenchmarkContext): the benchmark context.
Return:
All configurable settings in raw string, None means context is invalid or no benchmark is found.
"""
if not cls.is_benchmark_context_valid(benchmark_context):
return None
benchmark_name = cls.__get_benchmark_name(benchmark_context)
platform = benchmark_context.platform
(benchmark_class, predefine_params) = cls.__select_benchmark(benchmark_name, platform)
if benchmark_class:
benchmark = benchmark_class(benchmark_name)
benchmark.add_parser_arguments()
return benchmark.get_configurable_settings()
else:
return None
@classmethod
def launch_benchmark(cls, benchmark_context):
"""Select and Launch benchmark.
Args:
benchmark_context (BenchmarkContext): the benchmark context.
Return:
benchmark (Benchmark): the benchmark instance contains all results,
None means context is invalid or no benchmark is found.
"""
if not cls.is_benchmark_context_valid(benchmark_context):
return None
benchmark_name = cls.__get_benchmark_name(benchmark_context)
benchmark = None
if benchmark_name:
platform = benchmark_context.platform
parameters = benchmark_context.parameters
(benchmark_class, predefine_params) = cls.__select_benchmark(benchmark_name, platform)
if benchmark_class:
if predefine_params:
parameters = predefine_params + ' ' + parameters
benchmark = benchmark_class(benchmark_name, parameters)
ret = benchmark.run()
print('ret = {}'.format(ret))
return benchmark
@classmethod
def is_benchmark_registered(cls, benchmark_context):
"""Check wether the benchmark is registered or not.
Args:
benchmark_context (BenchmarkContext): the benchmark context.
Return:
ret (bool): return True if context is valid and benchmark is registered.
"""
if not cls.is_benchmark_context_valid(benchmark_context):
return False
benchmark_name = cls.__get_benchmark_name(benchmark_context)
platform = benchmark_context.platform
if cls.benchmarks.get(benchmark_name, {}).get(platform) is None:
return False
return True
@classmethod
def __select_benchmark(cls, name, platform):
"""Select benchmark by name and platform.
Args:
name (str): internal name of benchmark.
platform (Platform): Platform type of benchmark.
Return:
benchmark_class (Benchmark): class object of benchmark.
predefine_params (str): predefined parameters which is set when register the benchmark.
"""
if name not in cls.benchmarks or platform not in cls.benchmarks[name]:
logger.warning('Benchmark has no implementation, name: {}, platform: {}'.format(name, platform))
return (None, None)
(benchmark_class, predefine_params) = cls.benchmarks[name][platform]
return (benchmark_class, predefine_params)
@classmethod
def clean_benchmarks(cls):
"""Clean up the benchmark registry."""
cls.benchmarks.clear()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""A module for unified result of benchmarks."""
import json
from enum import Enum
from superbench.common.utils import logger
class BenchmarkResult():
"""Result class of all benchmarks.
Defines the unified result format.
"""
def __init__(self, name, type, return_code, run_count=0):
"""Constructor.
Args:
name (str): name of benchmark.
type (BenchmarkType): type of benchmark.
return_code (ReturnCode): return code of benchmark.
run_count (int): run count of benchmark, all runs will be organized as array.
"""
self.__name = name
self.__type = type
self.__run_count = run_count
self.__return_code = return_code
self.__start_time = None
self.__end_time = None
self.__raw_data = dict()
self.__result = dict()
def __eq__(self, rhs):
"""Override equal function for deep comparison.
Args:
rhs (BenchmarkResult): instance to compare.
Return:
True if two instances have all the same values for all the same attributes.
"""
return self.__dict__ == rhs.__dict__
def add_raw_data(self, metric, value):
"""Add raw benchmark data into result.
Args:
metric (str): metric name which is the key.
value (str or list): raw benchmark data.
For e2e model benchmarks, its type is list.
For micro-benchmarks or docker-benchmarks, its type is string.
Return:
True if succeed to add the raw data.
"""
if not metric or not isinstance(metric, str):
logger.error(
'metric name of benchmark is not string, name: {}, metric type: {}'.format(self.__name, type(metric))
)
return False
if metric not in self.__raw_data:
self.__raw_data[metric] = list()
self.__raw_data[metric].append(value)
return True
def add_result(self, metric, value):
"""Add summarized data into result.
Args:
metric (str): metric name which is the key.
value (float): summarized data.
For e2e model benchmarks, the value is step-time or throughput.
For micro-benchmarks, the value is FLOPS, bandwidth and etc.
Return:
True if succeed to add the result.
"""
if not metric or not isinstance(metric, str):
logger.error(
'metric name of benchmark is not string, name: {}, metric type: {}'.format(self.__name, type(metric))
)
return False
if metric not in self.__result:
self.__result[metric] = list()
self.__result[metric].append(value)
return True
def set_timestamp(self, start, end):
"""Set the start and end timestamp of benchmarking.
Args:
start (datetime): start timestamp of benchmarking.
end (datetime): end timestamp of benchmarking.
"""
self.__start_time = start
self.__end_time = end
def set_benchmark_type(self, benchmark_type):
"""Set the type of benchmark.
Args:
benchmark_type (BenchmarkType): type of benchmark, such as BenchmarkType.MODEL, BenchmarkType.MICRO.
"""
self.__type = benchmark_type
def set_return_code(self, return_code):
"""Set the return code.
Args:
return_code (ReturnCode): return code defined in superbench.benchmarks.ReturnCode.
"""
self.__return_code = return_code
def to_string(self):
"""Serialize the BenchmarkResult object to string.
Return:
The serialized string of BenchmarkResult object.
"""
formatted_obj = dict()
for key in self.__dict__:
# The name of internal member is like '_BenchmarkResult__name'.
# For the result object return to caller, just keep 'name'.
formatted_key = key.split('__')[1]
if isinstance(self.__dict__[key], Enum):
formatted_obj[formatted_key] = self.__dict__[key].value
else:
formatted_obj[formatted_key] = self.__dict__[key]
return json.dumps(formatted_obj)
@property
def name(self):
"""Decoration function to access __name."""
return self.__name
@property
def type(self):
"""Decoration function to access __type."""
return self.__type
@property
def run_count(self):
"""Decoration function to access __run_count."""
return self.__run_count
@property
def return_code(self):
"""Decoration function to access __return_code."""
return self.__return_code
@property
def start_time(self):
"""Decoration function to access __start_time."""
return self.__start_time
@property
def end_time(self):
"""Decoration function to access __end_time."""
return self.__end_time
@property
def raw_data(self):
"""Decoration function to access __raw_data."""
return self.__raw_data
@property
def result(self):
"""Decoration function to access __result."""
return self.__result
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""A module for unified context of benchmarks."""
import enum
class Enum(enum.Enum):
"""Customized Enum class."""
@classmethod
def get_values(cls):
"""Return the value list."""
values = [item.value for item in cls]
return values
class ReturnCode(Enum):
"""The Enum class representing benchmark status."""
# Common return codes.
SUCCESS = 0
INVALID_ARGUMENT = 1
INVALID_BENCHMARK_TYPE = 2
INVALID_BENCHMARK_RESULT = 3
# Return codes related with model benchmarks.
NO_SUPPORTED_PRECISION = 10
MODEL_TRAIN_FAILURE = 11
MODEL_INFERENCE_FAILURE = 12
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Exception types for SuperBench errors."""
class DuplicateBenchmarkRegistrationError(Exception):
"""An error is raised for duplicate benchmark registration."""
pass
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Exposes the interface of SuperBench common utilities."""
from .logging import logger
__all__ = ['logger']
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""SuperBench loggin module."""
import socket
import logging
import sys
import io
class LoggerAdapter(logging.LoggerAdapter):
"""LoggerAdapter class which add customized function for log error and raise exception."""
def log_and_raise(self, exception, msg, *args):
"""Log error and raise exception.
Args:
exception (BaseException): Exception class.
msg (str): logging message.
args (dict): arguments dict for message.
"""
self.error(msg, *args)
raise exception(msg % args)
class Logger:
"""Logger class which creates logger instance."""
@staticmethod
def create_logger(name, level=logging.INFO, stream=sys.stdout):
"""Create logger instance with customized format.
Args:
name (str): project name.
level (int): logging level, default is INFO.
stream (TextIOBase): stream object, such as stdout or file object,
default is sys.stdout.
Return:
logger with the specified name, level and stream.
"""
is_level_valid = True
if level not in logging._levelToName.keys():
invalid_level = level
level = logging.INFO
is_level_valid = False
is_stream_valid = True
if not isinstance(stream, io.IOBase):
invalid_stream = stream
stream = sys.stdout
is_stream_valid = False
formatter = logging.Formatter(
'%(asctime)s - %(hostname)s - '
'%(filename)s:%(lineno)d - '
'%(levelname)s: %(message)s'
)
handler = logging.StreamHandler(stream=stream)
handler.setFormatter(formatter)
logger = logging.getLogger(name)
logger.setLevel(level)
logger.addHandler(handler)
logger = LoggerAdapter(logger, extra={'hostname': socket.gethostname()})
if not is_level_valid:
logger.error(
'Log level is invalid, replace it to logging.INFO - level: {}, expected: {}'.format(
invalid_level, ' '.join(str(x) for x in logging._levelToName.keys())
)
)
if not is_stream_valid:
logger.error('Stream is invalid, replace it to sys.stdout - stream type: {}'.format(type(invalid_stream)))
return logger
logger = Logger.create_logger('SuperBench', level=logging.INFO)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for BenchmarkRegistry module."""
from superbench.benchmarks import Platform, Framework, Precision, \
BenchmarkContext, BenchmarkRegistry, BenchmarkType, ReturnCode
from superbench.benchmarks.model_benchmarks import ModelBenchmark
class FakeModelBenchmark(ModelBenchmark):
"""Fake benchmark inherit from ModelBenchmark."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name: benchmark name.
parameters: benchmark parameters.
"""
super().__init__(name, parameters)
self._supported_precision = [Precision.FLOAT32, Precision.FLOAT16]
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
self._parser.add_argument(
'--hidden_size',
type=int,
default=1024,
required=False,
help='Hidden size',
)
self._parser.add_argument(
'--seq_len',
type=int,
default=512,
required=False,
help='Sequence length',
)
def _init_distributed_setting(self):
"""Initialize the distributed library and bind the worker to GPU."""
pass
def _generate_dataset(self):
"""Generate dataset for benchmarking according to shape info."""
pass
def _init_dataloader(self):
"""Initialize the distributed dataloader."""
pass
def _create_optimizer(self):
"""Create the optimzier instance used for training."""
pass
def _create_model(self, precision):
"""Construct the model for benchmarking."""
pass
def _train_step(self, precision):
"""Define the training process.
Args:
precision (str): precision of model and input data,
such as float, half.
Return:
The step-time list of every training step.
"""
duration = []
for i in range(self._args.num_steps):
duration.append(2)
return duration
def _inference_step(self, precision):
"""Define the inference process.
Args:
precision (str): precision of model and input data,
such as float, half.
Return:
The latency list of every inference operation.
"""
duration = []
for i in range(self._args.num_steps):
duration.append(4)
return duration
def _cal_params_size(self):
"""Calculate the parameters scale of the model.
Return:
The count of trainable parameters.
"""
return 200
def create_benchmark(params='--num_steps=8'):
"""Register and create benchmark."""
# Register the FakeModelBenchmark benchmark.
BenchmarkRegistry.register_benchmark(
'pytorch-fake-model',
FakeModelBenchmark,
parameters='--hidden_size=2',
platform=Platform.CUDA,
)
context = BenchmarkContext('fake-model', Platform.CUDA, parameters=params, framework=Framework.PYTORCH)
name = BenchmarkRegistry._BenchmarkRegistry__get_benchmark_name(context)
assert (name)
(benchmark_class, predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(name, context.platform)
assert (benchmark_class)
BenchmarkRegistry.clean_benchmarks()
return benchmark_class(name, predefine_params + ' ' + context.parameters)
def test_arguments_related_interfaces():
"""Test arguments related interfaces.
Benchmark.add_parser_arguments(),
Benchmark.parse_args(),
Benchmark.get_configurable_settings()
"""
# Positive case for parse_args().
benchmark = create_benchmark('--num_steps=9')
benchmark.add_parser_arguments()
(ret, args, unknown) = benchmark.parse_args()
assert (ret and args.num_steps == 9)
# Negative case for parse_args() - invalid precision.
benchmark = create_benchmark('--num_steps=8 --precision=fp32')
benchmark.add_parser_arguments()
(ret, args, unknown) = benchmark.parse_args()
assert (ret is False)
# Test get_configurable_settings().
settings = benchmark.get_configurable_settings()
expected_settings = (
"""optional arguments:
--run_count int The run count of benchmark.
--duration int The elapsed time of benchmark in seconds.
--num_warmup int The number of warmup step
--num_steps int The number of test step
--batch_size int The number of batch size
--precision Precision [Precision ...]
Model precision. E.g. float16 float32 float64 bfloat16
uint8 int8 int16 int32 int64.
--model_action ModelAction [ModelAction ...]
Benchmark model process. E.g. train inference.
--distributed_impl DistributedImpl
Distributed implementations. E.g. ddp mirrored
multiworkermirrored parameterserver horovod
--distributed_backend DistributedBackend
Distributed backends. E.g. nccl mpi gloo
--hidden_size int Hidden size
--seq_len int Sequence length"""
)
assert (settings == expected_settings)
def test_preprocess():
"""Test interface Benchmark._preprocess()."""
# Positive case for _preprocess().
benchmark = create_benchmark('--num_steps=8')
assert (benchmark._preprocess())
assert (benchmark.return_code == ReturnCode.SUCCESS)
settings = benchmark.get_configurable_settings()
expected_settings = (
"""optional arguments:
--run_count int The run count of benchmark.
--duration int The elapsed time of benchmark in seconds.
--num_warmup int The number of warmup step
--num_steps int The number of test step
--batch_size int The number of batch size
--precision Precision [Precision ...]
Model precision. E.g. float16 float32 float64 bfloat16
uint8 int8 int16 int32 int64.
--model_action ModelAction [ModelAction ...]
Benchmark model process. E.g. train inference.
--distributed_impl DistributedImpl
Distributed implementations. E.g. ddp mirrored
multiworkermirrored parameterserver horovod
--distributed_backend DistributedBackend
Distributed backends. E.g. nccl mpi gloo
--hidden_size int Hidden size
--seq_len int Sequence length"""
)
print(settings)
assert (settings == expected_settings)
# Negative case for _preprocess() - invalid precision.
benchmark = create_benchmark('--num_steps=8 --precision=fp32')
assert (benchmark._preprocess() is False)
assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)
# Negative case for _preprocess() - invalid benchmark type.
benchmark = create_benchmark('--num_steps=8 --precision=float32')
benchmark._benchmark_type = Platform.CUDA
assert (benchmark._preprocess() is False)
assert (benchmark.return_code == ReturnCode.INVALID_BENCHMARK_TYPE)
def test_train():
"""Test interface Benchmark.__train()."""
benchmark = create_benchmark()
expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {"steptime_train_float32": [[2, 2, 2, 2, 2, 2, 2, 2]], '
'"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0]}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__train(Precision.FLOAT32))
assert (benchmark.serialized_result == expected_result)
# Step time list is empty (simulate training failure).
benchmark = create_benchmark('--num_steps=0')
expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {}, "result": {}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__train(Precision.FLOAT32) is False)
assert (benchmark.serialized_result == expected_result)
def test_inference():
"""Test interface Benchmark.__inference()."""
benchmark = create_benchmark()
expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {"steptime_inference_float16": [[4, 4, 4, 4, 4, 4, 4, 4]], '
'"throughput_inference_float16": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"result": {"steptime_inference_float16": [4.0], "throughput_inference_float16": [8000.0]}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16))
assert (benchmark.serialized_result == expected_result)
# Step time list is empty (simulate inference failure).
benchmark = create_benchmark('--num_steps=0')
expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {}, "result": {}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16) is False)
assert (benchmark.serialized_result == expected_result)
def test_benchmark():
"""Test interface Benchmark._benchmark()."""
# Positive case for _benchmark().
benchmark = create_benchmark()
benchmark._preprocess()
assert (benchmark._benchmark())
assert (benchmark.name == 'pytorch-fake-model')
assert (benchmark.type == BenchmarkType.MODEL)
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
expected_raw_data = {
'steptime_train_float32': [[2, 2, 2, 2, 2, 2, 2, 2]],
'throughput_train_float32': [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]],
'steptime_train_float16': [[2, 2, 2, 2, 2, 2, 2, 2]],
'throughput_train_float16': [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]
}
assert (benchmark.raw_data == expected_raw_data)
expected_result = {
'steptime_train_float32': [2.0],
'throughput_train_float32': [16000.0],
'steptime_train_float16': [2.0],
'throughput_train_float16': [16000.0]
}
assert (benchmark.result == expected_result)
expected_serialized_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, "start_time": null, '
'"end_time": null, "raw_data": {"steptime_train_float32": [[2, 2, 2, 2, 2, 2, 2, 2]], '
'"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
'"steptime_train_float16": [[2, 2, 2, 2, 2, 2, 2, 2]], '
'"throughput_train_float16": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"steptime_train_float32": [2.0], "throughput_train_float32": [16000.0], '
'"steptime_train_float16": [2.0], "throughput_train_float16": [16000.0]}}'
)
assert (benchmark.serialized_result == expected_serialized_result)
# Negative case for _benchmark() - no supported precision found.
benchmark = create_benchmark('--precision=int16')
assert (benchmark._preprocess())
assert (benchmark._benchmark() is False)
assert (benchmark.return_code == ReturnCode.NO_SUPPORTED_PRECISION)
# Negative case for _benchmark() - model train failure, step time list is empty.
benchmark = create_benchmark('--num_steps=0')
assert (benchmark._preprocess())
assert (benchmark._benchmark() is False)
assert (benchmark.return_code == ReturnCode.MODEL_TRAIN_FAILURE)
# Negative case for _benchmark() - model inference failure, step time list is empty.
benchmark = create_benchmark('--model_action=inference --num_steps=0')
assert (benchmark._preprocess())
assert (benchmark._benchmark() is False)
assert (benchmark.return_code == ReturnCode.MODEL_INFERENCE_FAILURE)
def test_check_result_format():
"""Test interface Benchmark.__check_result_format()."""
# Positive case for __check_result_format().
benchmark = create_benchmark()
benchmark._preprocess()
assert (benchmark._benchmark())
assert (benchmark._Benchmark__check_result_type())
assert (benchmark._Benchmark__check_summarized_result())
assert (benchmark._Benchmark__check_raw_data())
# Negative case for __check_result_format() - change List[int] to List[str].
benchmark._result._BenchmarkResult__result = {'metric1': ['2.0']}
assert (benchmark._Benchmark__check_summarized_result() is False)
# Negative case for __check_raw_data() - change List[List[int]] to List[List[str]].
benchmark._result._BenchmarkResult__raw_data = {'metric1': [['2.0']]}
assert (benchmark._Benchmark__check_raw_data() is False)
# Negative case for __check_raw_data() - invalid benchmark result.
assert (benchmark._Benchmark__check_result_format() is False)
assert (benchmark.return_code == ReturnCode.INVALID_BENCHMARK_RESULT)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for BenchmarkResult module."""
from superbench.benchmarks import BenchmarkContext, Platform, Framework
def test_benchmark_context():
"""Test BenchmarkContext class."""
context = BenchmarkContext('pytorch-bert-large', Platform.CUDA, 'batch_size=8', framework=Framework.PYTORCH)
assert (context.name == 'pytorch-bert-large')
assert (context.platform == Platform.CUDA)
assert (context.parameters == 'batch_size=8')
assert (context.framework == Framework.PYTORCH)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for BenchmarkRegistry module."""
import re
from superbench.benchmarks import Platform, Framework, BenchmarkType, BenchmarkContext, BenchmarkRegistry, ReturnCode
from superbench.benchmarks.micro_benchmarks import MicroBenchmark
class AccumulationBenchmark(MicroBenchmark):
"""Benchmark that do accumulation from lower_bound to upper_bound."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name: benchmark name.
parameters: benchmark parameters.
"""
super().__init__(name, parameters)
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
self._parser.add_argument(
'--lower_bound',
type=int,
default=0,
required=False,
help='The lower bound for accumulation.',
)
self._parser.add_argument(
'--upper_bound',
type=int,
default=2,
required=False,
help='The upper bound for accumulation.',
)
def _benchmark(self):
"""Implementation for benchmarking."""
raw_data = []
result = 0
for i in range(self._args.lower_bound, self._args.upper_bound):
result += i
raw_data.append(str(result))
metric = 'accumulation_result'
self._result.add_raw_data(metric, ','.join(raw_data))
self._result.add_result(metric, result)
return True
def test_register_benchmark():
"""Test interface BenchmarkRegistry.register_benchmark()."""
# Register the benchmark for all platform if use default platform.
BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark)
for platform in Platform:
context = BenchmarkContext('accumulation', platform)
assert (BenchmarkRegistry.is_benchmark_registered(context))
BenchmarkRegistry.clean_benchmarks()
# Register the benchmark for CUDA platform if use platform=Platform.CUDA.
BenchmarkRegistry.register_benchmark('accumulation-cuda', AccumulationBenchmark, platform=Platform.CUDA)
context = BenchmarkContext('accumulation-cuda', Platform.CUDA)
assert (BenchmarkRegistry.is_benchmark_registered(context))
context = BenchmarkContext('accumulation-cuda', Platform.ROCM)
assert (BenchmarkRegistry.is_benchmark_registered(context) is False)
BenchmarkRegistry.clean_benchmarks()
def test_is_benchmark_context_valid():
"""Test interface BenchmarkRegistry.is_benchmark_context_valid()."""
# Positive case.
context = BenchmarkContext('accumulation', Platform.CPU)
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
# Negative case.
context = 'context'
assert (BenchmarkRegistry.is_benchmark_context_valid(context) is False)
context = None
assert (BenchmarkRegistry.is_benchmark_context_valid(context) is False)
def test_get_benchmark_name():
"""Test interface BenchmarkRegistry.get_benchmark_name()."""
# Register benchmarks for testing.
benchmark_names = ['accumulation', 'pytorch-accumulation', 'tf1-accumulation', 'onnx-accumulation']
for name in benchmark_names:
BenchmarkRegistry.register_benchmark(name, AccumulationBenchmark)
# Test benchmark name for different Frameworks.
benchmark_frameworks = [Framework.NONE, Framework.PYTORCH, Framework.TENSORFLOW1, Framework.ONNX]
for i in range(len(benchmark_names)):
context = BenchmarkContext('accumulation', Platform.CPU, framework=benchmark_frameworks[i])
name = BenchmarkRegistry._BenchmarkRegistry__get_benchmark_name(context)
assert (name == benchmark_names[i])
BenchmarkRegistry.clean_benchmarks()
def test_check_parameters():
"""Test interface BenchmarkRegistry.check_parameters()."""
# Register benchmarks for testing.
BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark)
# Positive case.
context = BenchmarkContext('accumulation', Platform.CPU, parameters='--lower_bound=1')
assert (BenchmarkRegistry.check_parameters(context))
# Negative case.
context = BenchmarkContext('accumulation', Platform.CPU, parameters='--lower=1')
assert (BenchmarkRegistry.check_parameters(context) is False)
BenchmarkRegistry.clean_benchmarks()
def test_get_benchmark_configurable_settings():
"""Test BenchmarkRegistry interface.
BenchmarkRegistry.get_benchmark_configurable_settings().
"""
# Register benchmarks for testing.
BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark)
context = BenchmarkContext('accumulation', Platform.CPU)
settings = BenchmarkRegistry.get_benchmark_configurable_settings(context)
expected = """optional arguments:
--run_count int The run count of benchmark.
--duration int The elapsed time of benchmark in seconds.
--lower_bound int The lower bound for accumulation.
--upper_bound int The upper bound for accumulation."""
assert (settings == expected)
BenchmarkRegistry.clean_benchmarks()
def test_launch_benchmark():
"""Test interface BenchmarkRegistry.launch_benchmark()."""
# Register benchmarks for testing.
BenchmarkRegistry.register_benchmark(
'accumulation', AccumulationBenchmark, parameters='--upper_bound=5', platform=Platform.CPU
)
# Launch benchmark.
context = BenchmarkContext('accumulation', Platform.CPU, parameters='--lower_bound=1')
if BenchmarkRegistry.check_parameters(context):
benchmark = BenchmarkRegistry.launch_benchmark(context)
assert (benchmark)
assert (benchmark.name == 'accumulation')
assert (benchmark.type == BenchmarkType.MICRO)
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert (benchmark.raw_data == {'accumulation_result': ['1,3,6,10']})
assert (benchmark.result == {'accumulation_result': [10]})
# Replace the timestamp as null.
result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result)
expected = (
'{"name": "accumulation", "type": "micro", "run_count": 1, '
'"return_code": 0, "start_time": null, "end_time": null, '
'"raw_data": {"accumulation_result": ["1,3,6,10"]}, '
'"result": {"accumulation_result": [10]}}'
)
assert (result == expected)
# Launch benchmark with overridden parameters.
context = BenchmarkContext('accumulation', Platform.CPU, parameters='--lower_bound=1 --upper_bound=4')
if BenchmarkRegistry.check_parameters(context):
benchmark = BenchmarkRegistry.launch_benchmark(context)
assert (benchmark)
assert (benchmark.name == 'accumulation')
assert (benchmark.type == BenchmarkType.MICRO)
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert (benchmark.raw_data == {'accumulation_result': ['1,3,6']})
assert (benchmark.result == {'accumulation_result': [6]})
# Replace the timestamp as null.
result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result)
expected = (
'{"name": "accumulation", "type": "micro", "run_count": 1, '
'"return_code": 0, "start_time": null, "end_time": null, '
'"raw_data": {"accumulation_result": ["1,3,6"]}, '
'"result": {"accumulation_result": [6]}}'
)
assert (result == expected)
# Failed to launch benchmark.
context = BenchmarkContext(
'accumulation', Platform.CPU, parameters='--lower_bound=1 --upper_bound=4', framework=Framework.PYTORCH
)
assert (BenchmarkRegistry.check_parameters(context) is False)
BenchmarkRegistry.clean_benchmarks()
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for BenchmarkResult module."""
from superbench.benchmarks import BenchmarkType, ReturnCode
from superbench.benchmarks.result import BenchmarkResult
def test_add_raw_data():
"""Test interface BenchmarkResult.add_raw_data()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
result.add_raw_data('metric1', 'raw log 1')
result.add_raw_data('metric1', 'raw log 2')
assert (result.raw_data['metric1'][0] == 'raw log 1')
assert (result.raw_data['metric1'][1] == 'raw log 2')
assert (result.type == BenchmarkType.MICRO.value)
assert (result.return_code == ReturnCode.SUCCESS.value)
result = BenchmarkResult('model', BenchmarkType.MODEL.value, ReturnCode.SUCCESS.value)
result.add_raw_data('metric1', [1, 2, 3])
result.add_raw_data('metric1', [4, 5, 6])
assert (result.raw_data['metric1'][0] == [1, 2, 3])
assert (result.raw_data['metric1'][1] == [4, 5, 6])
assert (result.type == BenchmarkType.MODEL.value)
assert (result.return_code == ReturnCode.SUCCESS.value)
def test_add_result():
"""Test interface BenchmarkResult.add_result()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
result.add_result('metric1', 300)
result.add_result('metric1', 200)
assert (result.result['metric1'][0] == 300)
assert (result.result['metric1'][1] == 200)
def test_set_timestamp():
"""Test interface BenchmarkResult.set_timestamp()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
start_time = '2021-02-03 16:59:49'
end_time = '2021-02-03 17:00:08'
result.set_timestamp(start_time, end_time)
assert (result.start_time == start_time)
assert (result.end_time == end_time)
def test_set_benchmark_type():
"""Test interface BenchmarkResult.set_benchmark_type()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
result.set_benchmark_type(BenchmarkType.MICRO.value)
assert (result.type == BenchmarkType.MICRO.value)
def test_set_return_code():
"""Test interface BenchmarkResult.set_return_code()."""
result = BenchmarkResult('micro', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value)
assert (result.return_code == ReturnCode.SUCCESS.value)
result.set_return_code(ReturnCode.INVALID_ARGUMENT.value)
assert (result.return_code == ReturnCode.INVALID_ARGUMENT.value)
result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT.value)
assert (result.return_code == ReturnCode.INVALID_BENCHMARK_RESULT.value)
def test_serialize_deserialize():
"""Test serialization/deserialization and compare the results."""
# Result with one metric.
result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO.value, ReturnCode.SUCCESS.value, run_count=2)
result.add_result('metric1', 300)
result.add_result('metric1', 200)
result.add_result('metric2', 100)
result.add_raw_data('metric1', [1, 2, 3])
result.add_raw_data('metric1', [4, 5, 6])
result.add_raw_data('metric1', [7, 8, 9])
start_time = '2021-02-03 16:59:49'
end_time = '2021-02-03 17:00:08'
result.set_timestamp(start_time, end_time)
result.set_benchmark_type(BenchmarkType.MICRO.value)
expected = (
'{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, '
'"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", '
'"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, '
'"result": {"metric1": [300, 200], "metric2": [100]}}'
)
assert (result.to_string() == expected)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment