Unverified Commit 9a3ce39d authored by Yang Wang's avatar Yang Wang Committed by GitHub
Browse files

Update omegaconf version to 2.3.0 (#631)

Update `omegaconf` version to
[2.3.0](https://pypi.org/project/omegaconf/2.3.0/) as omegaconf 2.0.6
has a non-standard dependency specifier PyYAML>=5.1.*. pip 24.1 will
enforce this behaviour change.
Discussion can be found at https://github.com/pypa/pip/issues/12063.
parent 7435f10a
...@@ -164,7 +164,7 @@ def run(self): ...@@ -164,7 +164,7 @@ def run(self):
'natsort>=7.1.1', 'natsort>=7.1.1',
'networkx>=2.5', 'networkx>=2.5',
'numpy>=1.19.2', 'numpy>=1.19.2',
'omegaconf==2.0.6', 'omegaconf==2.3.0',
'openpyxl>=3.0.7', 'openpyxl>=3.0.7',
'packaging>=21.0', 'packaging>=21.0',
'pandas>=1.1.5', 'pandas>=1.1.5',
...@@ -198,7 +198,7 @@ def run(self): ...@@ -198,7 +198,7 @@ def run(self):
'pydocstyle>=5.1.1', 'pydocstyle>=5.1.1',
'pytest-cov>=2.11.1', 'pytest-cov>=2.11.1',
'pytest-subtests>=0.4.0', 'pytest-subtests>=0.4.0',
'pytest>=6.2.2', 'pytest>=6.2.2, <=7.4.4',
'types-markdown', 'types-markdown',
'types-pkg_resources', 'types-pkg_resources',
'types-pyyaml', 'types-pyyaml',
......
...@@ -71,13 +71,13 @@ def __get_enabled_benchmarks(self): ...@@ -71,13 +71,13 @@ def __get_enabled_benchmarks(self):
Return: Return:
list: List of benchmarks which will be executed. list: List of benchmarks which will be executed.
""" """
if self._sb_config.superbench.enable: if 'enable' in self._sb_config.superbench and self._sb_config.superbench.enable:
if isinstance(self._sb_config.superbench.enable, str): if isinstance(self._sb_config.superbench.enable, str):
return [self._sb_config.superbench.enable] return [self._sb_config.superbench.enable]
elif isinstance(self._sb_config.superbench.enable, (list, ListConfig)): elif isinstance(self._sb_config.superbench.enable, (list, ListConfig)):
return list(self._sb_config.superbench.enable) return list(self._sb_config.superbench.enable)
# TODO: may exist order issue # TODO: may exist order issue
return [k for k, v in self._sb_benchmarks.items() if v.enable] return [k for k, v in self._sb_benchmarks.items() if 'enable' in v and v.enable]
def __get_platform(self): def __get_platform(self):
"""Detect runninng platform by environment.""" """Detect runninng platform by environment."""
...@@ -228,32 +228,37 @@ def exec(self): ...@@ -228,32 +228,37 @@ def exec(self):
logger.warning('Monitor can not support CPU platform.') logger.warning('Monitor can not support CPU platform.')
benchmark_real_name = benchmark_name.split(':')[0] benchmark_real_name = benchmark_name.split(':')[0]
for framework in benchmark_config.frameworks or [Framework.NONE.value]: if 'frameworks' in benchmark_config:
if benchmark_real_name == 'model-benchmarks' or ( for framework in benchmark_config.frameworks or [Framework.NONE.value]:
':' not in benchmark_name and benchmark_name.endswith('_models') if benchmark_real_name == 'model-benchmarks' or (
): ':' not in benchmark_name and benchmark_name.endswith('_models')
for model in benchmark_config.models: ):
full_name = f'{benchmark_name}/{framework}-{model}' for model in benchmark_config.models:
full_name = f'{benchmark_name}/{framework}-{model}'
logger.info('Executor is going to execute %s.', full_name)
context = BenchmarkRegistry.create_benchmark_context(
model,
platform=self.__get_platform(),
framework=Framework(framework.lower()),
parameters=self.__get_arguments(
{} if 'parameters' not in benchmark_config else benchmark_config.parameters
)
)
result = self.__exec_benchmark(full_name, context)
benchmark_results.append(result)
else:
full_name = benchmark_name
logger.info('Executor is going to execute %s.', full_name) logger.info('Executor is going to execute %s.', full_name)
context = BenchmarkRegistry.create_benchmark_context( context = BenchmarkRegistry.create_benchmark_context(
model, benchmark_real_name,
platform=self.__get_platform(), platform=self.__get_platform(),
framework=Framework(framework.lower()), framework=Framework(framework.lower()),
parameters=self.__get_arguments(benchmark_config.parameters) parameters=self.__get_arguments(
{} if 'parameters' not in benchmark_config else benchmark_config.parameters
)
) )
result = self.__exec_benchmark(full_name, context) result = self.__exec_benchmark(full_name, context)
benchmark_results.append(result) benchmark_results.append(result)
else:
full_name = benchmark_name
logger.info('Executor is going to execute %s.', full_name)
context = BenchmarkRegistry.create_benchmark_context(
benchmark_real_name,
platform=self.__get_platform(),
framework=Framework(framework.lower()),
parameters=self.__get_arguments(benchmark_config.parameters)
)
result = self.__exec_benchmark(full_name, context)
benchmark_results.append(result)
if monitor: if monitor:
monitor.stop() monitor.stop()
......
...@@ -67,24 +67,24 @@ def __validate_sb_config(self): # noqa: C901 ...@@ -67,24 +67,24 @@ def __validate_sb_config(self): # noqa: C901
InvalidConfigError: If input config is invalid. InvalidConfigError: If input config is invalid.
""" """
# TODO: add validation and defaulting # TODO: add validation and defaulting
if not self._sb_config.superbench.env: if 'env' not in self._sb_config.superbench:
self._sb_config.superbench.env = {} self._sb_config.superbench.env = {}
for name in self._sb_benchmarks: for name in self._sb_benchmarks:
if not self._sb_benchmarks[name].modes: if 'modes' not in self._sb_benchmarks[name]:
self._sb_benchmarks[name].modes = [] self._sb_benchmarks[name].modes = []
for idx, mode in enumerate(self._sb_benchmarks[name].modes): for idx, mode in enumerate(self._sb_benchmarks[name].modes):
if not mode.env: if 'env' not in mode:
self._sb_benchmarks[name].modes[idx].env = {} self._sb_benchmarks[name].modes[idx].env = {}
if mode.name == 'local': if mode.name == 'local':
if not mode.proc_num: if 'proc_num' not in mode:
self._sb_benchmarks[name].modes[idx].proc_num = 1 self._sb_benchmarks[name].modes[idx].proc_num = 1
if not mode.prefix: if 'prefix' not in mode:
self._sb_benchmarks[name].modes[idx].prefix = '' self._sb_benchmarks[name].modes[idx].prefix = ''
elif mode.name == 'torch.distributed': elif mode.name == 'torch.distributed':
if not mode.proc_num: if 'proc_num' not in mode:
self._sb_benchmarks[name].modes[idx].proc_num = 8 self._sb_benchmarks[name].modes[idx].proc_num = 8
elif mode.name == 'mpi': elif mode.name == 'mpi':
if not mode.mca: if 'machinefile' not in mode:
self._sb_benchmarks[name].modes[idx].mca = { self._sb_benchmarks[name].modes[idx].mca = {
'pml': 'ob1', 'pml': 'ob1',
'btl': '^openib', 'btl': '^openib',
...@@ -93,8 +93,8 @@ def __validate_sb_config(self): # noqa: C901 ...@@ -93,8 +93,8 @@ def __validate_sb_config(self): # noqa: C901
} }
for key in ['PATH', 'LD_LIBRARY_PATH', 'SB_MICRO_PATH', 'SB_WORKSPACE']: for key in ['PATH', 'LD_LIBRARY_PATH', 'SB_MICRO_PATH', 'SB_WORKSPACE']:
self._sb_benchmarks[name].modes[idx].env.setdefault(key, None) self._sb_benchmarks[name].modes[idx].env.setdefault(key, None)
if mode.pattern: if 'pattern' in mode:
if mode.pattern.type == 'topo-aware' and not mode.pattern.ibstat: if mode.pattern.type == 'topo-aware' and 'ibstat' not in mode.pattern:
self._sb_benchmarks[name].modes[idx].pattern.ibstat = gen_ibstat( self._sb_benchmarks[name].modes[idx].pattern.ibstat = gen_ibstat(
self._ansible_config, str(self._output_path / 'ibstate_file.txt') self._ansible_config, str(self._output_path / 'ibstate_file.txt')
) )
...@@ -105,12 +105,12 @@ def __get_enabled_benchmarks(self): ...@@ -105,12 +105,12 @@ def __get_enabled_benchmarks(self):
Return: Return:
list: List of benchmarks which will be executed. list: List of benchmarks which will be executed.
""" """
if self._sb_config.superbench.enable: if 'enable' in self._sb_config.superbench and self._sb_config.superbench.enable:
if isinstance(self._sb_config.superbench.enable, str): if isinstance(self._sb_config.superbench.enable, str):
return [self._sb_config.superbench.enable] return [self._sb_config.superbench.enable]
elif isinstance(self._sb_config.superbench.enable, (list, ListConfig)): elif isinstance(self._sb_config.superbench.enable, (list, ListConfig)):
return list(self._sb_config.superbench.enable) return list(self._sb_config.superbench.enable)
return [k for k, v in self._sb_benchmarks.items() if v.enable] return [k for k, v in self._sb_benchmarks.items() if 'enable' in v and v.enable]
def __get_mode_command(self, benchmark_name, mode, timeout=None): def __get_mode_command(self, benchmark_name, mode, timeout=None):
"""Get runner command for given mode. """Get runner command for given mode.
...@@ -141,7 +141,7 @@ def __get_mode_command(self, benchmark_name, mode, timeout=None): ...@@ -141,7 +141,7 @@ def __get_mode_command(self, benchmark_name, mode, timeout=None):
elif mode.name == 'torch.distributed': elif mode.name == 'torch.distributed':
# TODO: replace with torch.distributed.run in v1.9 # TODO: replace with torch.distributed.run in v1.9
# TODO: only supports node_num=1 and node_num=all currently # TODO: only supports node_num=1 and node_num=all currently
torch_dist_params = '' if mode.node_num == 1 else \ torch_dist_params = '' if 'node_num' in mode and mode.node_num == 1 else \
'--nnodes=$NNODES --node_rank=$NODE_RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT ' '--nnodes=$NNODES --node_rank=$NODE_RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
mode_command = ( mode_command = (
f'torchrun' f'torchrun'
...@@ -158,8 +158,8 @@ def __get_mode_command(self, benchmark_name, mode, timeout=None): ...@@ -158,8 +158,8 @@ def __get_mode_command(self, benchmark_name, mode, timeout=None):
'-bind-to numa ' # bind processes to numa '-bind-to numa ' # bind processes to numa
'{mca_list} {env_list} {command}' '{mca_list} {env_list} {command}'
).format( ).format(
host_list=f'-host localhost:{mode.proc_num}' if mode.node_num == 1 else host_list=f'-host localhost:{mode.proc_num}' if 'node_num' in mode and mode.node_num == 1 else
f'-hostfile hostfile -map-by ppr:{mode.proc_num}:node' if mode.host_list is None else '-host ' + f'-hostfile hostfile -map-by ppr:{mode.proc_num}:node' if 'host_list' not in mode else '-host ' +
','.join(f'{host}:{mode.proc_num}' for host in mode.host_list), ','.join(f'{host}:{mode.proc_num}' for host in mode.host_list),
mca_list=' '.join(f'-mca {k} {v}' for k, v in mode.mca.items()), mca_list=' '.join(f'-mca {k} {v}' for k, v in mode.mca.items()),
env_list=' '.join( env_list=' '.join(
...@@ -206,6 +206,9 @@ def run_sys_info(self): ...@@ -206,6 +206,9 @@ def run_sys_info(self):
logger.info('Runner is going to get node system info.') logger.info('Runner is going to get node system info.')
fcmd = "docker exec sb-workspace bash -c '{command}'" fcmd = "docker exec sb-workspace bash -c '{command}'"
if 'skip' not in self._docker_config:
self._docker_config.skip = False
if self._docker_config.skip: if self._docker_config.skip:
fcmd = "bash -c 'cd $SB_WORKSPACE && {command}'" fcmd = "bash -c 'cd $SB_WORKSPACE && {command}'"
ansible_runner_config = self._ansible_client.get_shell_config( ansible_runner_config = self._ansible_client.get_shell_config(
...@@ -225,7 +228,7 @@ def check_env(self): # pragma: no cover ...@@ -225,7 +228,7 @@ def check_env(self): # pragma: no cover
self._ansible_client.get_playbook_config( self._ansible_client.get_playbook_config(
'check_env.yaml', 'check_env.yaml',
extravars={ extravars={
'no_docker': bool(self._docker_config.skip), 'no_docker': False if 'skip' not in self._docker_config else self._docker_config.skip,
'output_dir': str(self._output_path), 'output_dir': str(self._output_path),
'env': '\n'.join(f'{k}={v}' for k, v in self._sb_config.superbench.env.items()), 'env': '\n'.join(f'{k}={v}' for k, v in self._sb_config.superbench.env.items()),
} }
...@@ -441,15 +444,17 @@ def _run_proc(self, benchmark_name, mode, vars): ...@@ -441,15 +444,17 @@ def _run_proc(self, benchmark_name, mode, vars):
int: Process return code. int: Process return code.
""" """
mode.update(vars) mode.update(vars)
if mode.name == 'mpi' and mode.pattern: if mode.name == 'mpi' and 'pattern' in mode:
mode.env.update({'SB_MODE_SERIAL_INDEX': mode.serial_index, 'SB_MODE_PARALLEL_INDEX': mode.parallel_index}) mode.env.update({'SB_MODE_SERIAL_INDEX': mode.serial_index, 'SB_MODE_PARALLEL_INDEX': mode.parallel_index})
logger.info('Runner is going to run %s in %s mode, proc rank %d.', benchmark_name, mode.name, mode.proc_rank) logger.info('Runner is going to run %s in %s mode, proc rank %d.', benchmark_name, mode.name, mode.proc_rank)
timeout = self._sb_benchmarks[benchmark_name].timeout timeout = self._sb_benchmarks[benchmark_name].get('timeout', 60)
if isinstance(timeout, int): if isinstance(timeout, int):
timeout = max(timeout, 60) timeout = max(timeout, 60)
env_list = '--env-file /tmp/sb.env' env_list = '--env-file /tmp/sb.env'
if 'skip' not in self._docker_config:
self._docker_config.skip = False
if self._docker_config.skip: if self._docker_config.skip:
env_list = 'set -o allexport && source /tmp/sb.env && set +o allexport' env_list = 'set -o allexport && source /tmp/sb.env && set +o allexport'
for k, v in mode.env.items(): for k, v in mode.env.items():
...@@ -463,7 +468,7 @@ def _run_proc(self, benchmark_name, mode, vars): ...@@ -463,7 +468,7 @@ def _run_proc(self, benchmark_name, mode, vars):
ansible_runner_config = self._ansible_client.get_shell_config( ansible_runner_config = self._ansible_client.get_shell_config(
fcmd.format(env_list=env_list, command=self.__get_mode_command(benchmark_name, mode, timeout)) fcmd.format(env_list=env_list, command=self.__get_mode_command(benchmark_name, mode, timeout))
) )
if mode.name == 'mpi' and mode.node_num != 1: if mode.name == 'mpi' and 'node_num' in mode and mode.node_num != 1:
ansible_runner_config = self._ansible_client.update_mpi_config(ansible_runner_config) ansible_runner_config = self._ansible_client.update_mpi_config(ansible_runner_config)
if isinstance(timeout, int): if isinstance(timeout, int):
...@@ -495,7 +500,7 @@ def run(self): ...@@ -495,7 +500,7 @@ def run(self):
) )
ansible_rc = sum(rc_list) ansible_rc = sum(rc_list)
elif mode.name == 'torch.distributed' or mode.name == 'mpi': elif mode.name == 'torch.distributed' or mode.name == 'mpi':
if not mode.pattern: if 'pattern' not in mode:
ansible_rc = self._run_proc(benchmark_name, mode, {'proc_rank': 0}) ansible_rc = self._run_proc(benchmark_name, mode, {'proc_rank': 0})
else: else:
if not os.path.exists(self._output_path / 'hostfile'): if not os.path.exists(self._output_path / 'hostfile'):
......
...@@ -44,7 +44,7 @@ def test_set_logger(self): ...@@ -44,7 +44,7 @@ def test_set_logger(self):
def test_get_enabled_benchmarks_enable_none(self): def test_get_enabled_benchmarks_enable_none(self):
"""Test enabled benchmarks when superbench.enable is none.""" """Test enabled benchmarks when superbench.enable is none."""
benchmarks = self.default_config.superbench.benchmarks benchmarks = self.default_config.superbench.benchmarks
expected_enabled_benchmarks = [x for x in benchmarks if benchmarks[x]['enable']] expected_enabled_benchmarks = [x for x in benchmarks if 'enable' in benchmarks[x] and benchmarks[x]['enable']]
self.assertListEqual(self.executor._sb_enabled, expected_enabled_benchmarks) self.assertListEqual(self.executor._sb_enabled, expected_enabled_benchmarks)
def test_get_enabled_benchmarks_enable_str(self): def test_get_enabled_benchmarks_enable_str(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment