"vscode:/vscode.git/clone" did not exist on "f5e1ce5b9237edbc2e524ae9ebcb2452dc842937"
Unverified Commit ddbc51a1 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Bug bash - Fix bugs and refine log in single GPU benchmarks (#97)

Fix bugs and refine log in single GPU benchmarks:

* Fix none framework issue
* Fix empty parameter bug
* Remove missed mobilenet_v3 models
* Change benchmark registration log to debug level
* Add pid in logging
* Add missing benchmarks in default config
* Fix deprecated logging warn
parent 03b41be1
...@@ -138,7 +138,7 @@ def run(self): ...@@ -138,7 +138,7 @@ def run(self):
'colorlog>=4.7.2', 'colorlog>=4.7.2',
'joblib>=1.0.1', 'joblib>=1.0.1',
'knack>=0.7.2', 'knack>=0.7.2',
'omegaconf>=2.0.6', 'omegaconf==2.0.6',
], ],
extras_require={ extras_require={
'dev': ['pre-commit>=2.10.0'], 'dev': ['pre-commit>=2.10.0'],
...@@ -151,6 +151,7 @@ def run(self): ...@@ -151,6 +151,7 @@ def run(self):
'pytest-cov>=2.11.1', 'pytest-cov>=2.11.1',
'pytest-subtests>=0.4.0', 'pytest-subtests>=0.4.0',
'pytest>=6.2.2', 'pytest>=6.2.2',
'types-pyyaml',
'vcrpy>=4.1.1', 'vcrpy>=4.1.1',
'yapf>=0.30.0', 'yapf>=0.30.0',
], ],
......
...@@ -142,14 +142,14 @@ def _inference_step(self, precision): ...@@ -142,14 +142,14 @@ def _inference_step(self, precision):
# Register CNN benchmarks. # Register CNN benchmarks.
# Reference: https://pytorch.org/vision/stable/models.html # Reference: https://pytorch.org/vision/0.8/models.html
# https://github.com/pytorch/vision/tree/master/torchvision/models # https://github.com/pytorch/vision/tree/v0.8.0/torchvision/models
MODELS = [ MODELS = [
'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'googlenet', 'inception_v3', 'mnasnet0_5', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'googlenet', 'inception_v3', 'mnasnet0_5',
'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3', 'mobilenet_v2', 'mobilenet_v3_large', 'mobilenet_v3_small', 'resnet18', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3', 'mobilenet_v2', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2', 'shufflenet_v2_x0_5',
'wide_resnet101_2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0', 'squeezenet1_0', 'squeezenet1_1', 'vgg11',
'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19' 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19'
] ]
for model in MODELS: for model in MODELS:
......
...@@ -77,7 +77,7 @@ def register_benchmark(cls, name, class_def, parameters='', platform=None): ...@@ -77,7 +77,7 @@ def register_benchmark(cls, name, class_def, parameters='', platform=None):
) )
else: else:
cls.benchmarks[name]['predefine_param'] = vars(args) cls.benchmarks[name]['predefine_param'] = vars(args)
logger.info('Benchmark registration - benchmark: {}, predefine_parameters: {}'.format(name, vars(args))) logger.debug('Benchmark registration - benchmark: {}, predefine_parameters: {}'.format(name, vars(args)))
@classmethod @classmethod
def is_benchmark_context_valid(cls, benchmark_context): def is_benchmark_context_valid(cls, benchmark_context):
......
...@@ -36,10 +36,12 @@ def add_handler(logger, stream=sys.stdout, filename=None, color=False): ...@@ -36,10 +36,12 @@ def add_handler(logger, stream=sys.stdout, filename=None, color=False):
filename (str, optional): The filename that file handler should use. Defaults to None. filename (str, optional): The filename that file handler should use. Defaults to None.
color (bool, optional): Colored format or not. Defaults to False. color (bool, optional): Colored format or not. Defaults to False.
""" """
formatter = logging.Formatter('[%(asctime)s %(hostname)s][%(filename)s:%(lineno)s][%(levelname)s] %(message)s') formatter = logging.Formatter(
'[%(asctime)s %(hostname)s:%(process)d][%(filename)s:%(lineno)s][%(levelname)s] %(message)s'
)
if color: if color:
formatter = colorlog.ColoredFormatter( formatter = colorlog.ColoredFormatter(
'[%(cyan)s%(asctime)s %(hostname)s%(reset)s]' '[%(cyan)s%(asctime)s %(hostname)s:%(process)d%(reset)s]'
'[%(blue)s%(filename)s:%(lineno)s%(reset)s]' '[%(blue)s%(filename)s:%(lineno)s%(reset)s]'
'[%(log_color)s%(levelname)s%(reset)s] %(message)s' '[%(log_color)s%(levelname)s%(reset)s] %(message)s'
) )
......
...@@ -2,6 +2,14 @@ ...@@ -2,6 +2,14 @@
superbench: superbench:
enable: null enable: null
benchmarks: benchmarks:
kernel-launch:
enable: true
gemm-flops:
enable: true
cudnn-function:
enable: true
cublas-function:
enable: true
matmul: matmul:
enable: true enable: true
modes: modes:
...@@ -11,8 +19,28 @@ superbench: ...@@ -11,8 +19,28 @@ superbench:
parallel: no parallel: no
frameworks: frameworks:
- pytorch - pytorch
gpt_models:
enable: true
modes:
- name: torch.distributed
proc_num: 8
node_num: all
frameworks:
- pytorch
models:
- gpt2-small
- gpt2-large
parameters: parameters:
num_steps: 2048 duration: 0
num_warmup: 16
num_steps: 128
batch_size: 4
precision:
- float32
- float16
model_action:
- train
- inference
bert_models: bert_models:
enable: true enable: true
modes: modes:
...@@ -26,8 +54,8 @@ superbench: ...@@ -26,8 +54,8 @@ superbench:
- bert-large - bert-large
parameters: parameters:
duration: 0 duration: 0
num_warmup: 64 num_warmup: 16
num_steps: 2048 num_steps: 128
batch_size: 16 batch_size: 16
precision: precision:
- float32 - float32
...@@ -35,3 +63,53 @@ superbench: ...@@ -35,3 +63,53 @@ superbench:
model_action: model_action:
- train - train
- inference - inference
lstm_models:
enable: true
modes:
- name: torch.distributed
proc_num: 8
node_num: all
frameworks:
- pytorch
models:
- lstm
parameters:
duration: 0
num_warmup: 16
num_steps: 128
batch_size: 128
precision:
- float32
- float16
model_action:
- train
- inference
cnn_models:
enable: true
modes:
- name: torch.distributed
proc_num: 8
node_num: all
frameworks:
- pytorch
models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- vgg11
- vgg13
- vgg16
- vgg19
parameters:
duration: 0
num_warmup: 16
num_steps: 128
batch_size: 128
precision:
- float32
- float16
model_action:
- train
- inference
...@@ -79,6 +79,8 @@ def __get_arguments(self, parameters): ...@@ -79,6 +79,8 @@ def __get_arguments(self, parameters):
str: Command line arguments. str: Command line arguments.
""" """
argv = [] argv = []
if not parameters:
return ''
for name, val in parameters.items(): for name, val in parameters.items():
if val is None: if val is None:
continue continue
...@@ -127,7 +129,7 @@ def __create_benchmark_dir(self, benchmark_name): ...@@ -127,7 +129,7 @@ def __create_benchmark_dir(self, benchmark_name):
""" """
benchmark_output_dir = Path(self._output_dir, 'benchmarks', benchmark_name) benchmark_output_dir = Path(self._output_dir, 'benchmarks', benchmark_name)
if benchmark_output_dir.is_dir() and any(benchmark_output_dir.iterdir()): if benchmark_output_dir.is_dir() and any(benchmark_output_dir.iterdir()):
logger.warn('Benchmark output directory %s is not empty.', str(benchmark_output_dir)) logger.warning('Benchmark output directory %s is not empty.', str(benchmark_output_dir))
for i in itertools.count(start=1): for i in itertools.count(start=1):
backup_dir = benchmark_output_dir.with_name('{}.{}'.format(benchmark_name, i)) backup_dir = benchmark_output_dir.with_name('{}.{}'.format(benchmark_name, i))
if not backup_dir.is_dir(): if not backup_dir.is_dir():
...@@ -153,7 +155,7 @@ def exec(self): ...@@ -153,7 +155,7 @@ def exec(self):
benchmark_config = self._sb_benchmarks[benchmark_name] benchmark_config = self._sb_benchmarks[benchmark_name]
benchmark_results = {} benchmark_results = {}
self.__create_benchmark_dir(benchmark_name) self.__create_benchmark_dir(benchmark_name)
for framework in benchmark_config.frameworks or [Framework.NONE]: for framework in benchmark_config.frameworks or [Framework.NONE.value]:
if benchmark_name.endswith('_models'): if benchmark_name.endswith('_models'):
for model in benchmark_config.models: for model in benchmark_config.models:
log_suffix = 'model-benchmark {}: {}/{}'.format(benchmark_name, framework, model) log_suffix = 'model-benchmark {}: {}/{}'.format(benchmark_name, framework, model)
...@@ -165,7 +167,7 @@ def exec(self): ...@@ -165,7 +167,7 @@ def exec(self):
parameters=self.__get_arguments(benchmark_config.parameters) parameters=self.__get_arguments(benchmark_config.parameters)
) )
result = self.__exec_benchmark(context, log_suffix) result = self.__exec_benchmark(context, log_suffix)
if framework != Framework.NONE: if framework != Framework.NONE.value:
benchmark_results['{}/{}'.format(framework, model)] = result benchmark_results['{}/{}'.format(framework, model)] = result
else: else:
benchmark_results[model] = result benchmark_results[model] = result
...@@ -179,7 +181,7 @@ def exec(self): ...@@ -179,7 +181,7 @@ def exec(self):
parameters=self.__get_arguments(benchmark_config.parameters) parameters=self.__get_arguments(benchmark_config.parameters)
) )
result = self.__exec_benchmark(context, log_suffix) result = self.__exec_benchmark(context, log_suffix)
if framework != Framework.NONE: if framework != Framework.NONE.value:
benchmark_results[framework] = result benchmark_results[framework] = result
else: else:
benchmark_results = result benchmark_results = result
......
...@@ -61,14 +61,14 @@ def test_get_platform(self): ...@@ -61,14 +61,14 @@ def test_get_platform(self):
def test_get_arguments(self): def test_get_arguments(self):
"""Test benchmarks arguments.""" """Test benchmarks arguments."""
expected_matmul_args = '--num_steps 2048' expected_matmul_args = ''
self.assertEqual( self.assertEqual(
self.executor._SuperBenchExecutor__get_arguments( self.executor._SuperBenchExecutor__get_arguments(
self.default_config.superbench.benchmarks.matmul.parameters self.default_config.superbench.benchmarks.matmul.parameters
), expected_matmul_args ), expected_matmul_args
) )
expected_bert_models_args = \ expected_bert_models_args = \
'--duration 0 --num_warmup 64 --num_steps 2048 --batch_size 16 ' \ '--duration 0 --num_warmup 16 --num_steps 128 --batch_size 16 ' \
'--precision float32 float16 --model_action train inference' '--precision float32 float16 --model_action train inference'
self.assertEqual( self.assertEqual(
self.executor._SuperBenchExecutor__get_arguments( self.executor._SuperBenchExecutor__get_arguments(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment