Unverified Commit 7838b6b1 authored by Yang Wang's avatar Yang Wang Committed by GitHub
Browse files

Runner - Support `pair-wise` pattern in `mpi` mode (#447)

* Extract pair-wise pattern from ib_validation
parent 6186146d
...@@ -463,4 +463,4 @@ Only available for `mpi` mode. ...@@ -463,4 +463,4 @@ Only available for `mpi` mode.
Available variables in formatted string includes: Available variables in formatted string includes:
+ `name` + `name`
* accepted values: `all-nodes` * accepted values: `all-nodes`, `pair-wise`
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
from superbench.common.utils.lazy_import import LazyImport from superbench.common.utils.lazy_import import LazyImport
from superbench.common.utils.process import run_command from superbench.common.utils.process import run_command
from superbench.common.utils.topo_aware import gen_topo_aware_config from superbench.common.utils.topo_aware import gen_topo_aware_config
from superbench.common.utils.gen_traffic_pattern_config import gen_tarffic_pattern_host_group from superbench.common.utils.gen_traffic_pattern_config import gen_pair_wise_config, gen_traffic_pattern_host_group
device_manager = LazyImport('superbench.common.utils.device_manager') device_manager = LazyImport('superbench.common.utils.device_manager')
...@@ -25,5 +25,6 @@ ...@@ -25,5 +25,6 @@
'rotate_dir', 'rotate_dir',
'run_command', 'run_command',
'gen_topo_aware_config', 'gen_topo_aware_config',
'gen_tarffic_pattern_host_group', 'gen_pair_wise_config',
'gen_traffic_pattern_host_group',
] ]
...@@ -22,6 +22,45 @@ def gen_all_nodes_config(n): ...@@ -22,6 +22,45 @@ def gen_all_nodes_config(n):
return config return config
def gen_pair_wise_config(n):
"""Generate pair-wised VM pairs config.
One-to-one means that each participant plays every other participant once.
The algorithm refers circle method of Round-robin tournament in
https://en.wikipedia.org/wiki/Round-robin_tournament.
if n is even, there are a total of n-1 rounds, with n/2 pair of 2 unique participants in each round.
If n is odd, there will be n rounds, each with n-1/2 pairs, and one participant rotating empty in that round.
In each round, pair up two by two from the beginning to the middle as (begin, end),(begin+1,end-1)...
Then, all the participants except the beginning shift left one position, and repeat the previous step.
Args:
n (int): the number of participants.
Returns:
config (list): the generated config list, each item in the list is a str like "0,1;2,3".
"""
config = []
if n <= 0:
logger.warning('n is not positive')
return config
candidates = list(range(n))
# Add a fake participant if n is odd
if n % 2 == 1:
candidates.append(-1)
count = len(candidates)
non_moving = [candidates[0]]
for _ in range(count - 1):
pairs = [
'{},{}'.format(candidates[i], candidates[count - i - 1]) for i in range(0, count // 2)
if candidates[i] != -1 and candidates[count - i - 1] != -1
]
row = ';'.join(pairs)
config.append(row)
robin = candidates[2:] + candidates[1:2]
candidates = non_moving + robin
return config
def __convert_config_to_host_group(config, host_list): def __convert_config_to_host_group(config, host_list):
"""Convert config format to host node. """Convert config format to host node.
...@@ -45,7 +84,7 @@ def __convert_config_to_host_group(config, host_list): ...@@ -45,7 +84,7 @@ def __convert_config_to_host_group(config, host_list):
return host_groups return host_groups
def gen_tarffic_pattern_host_group(host_list, pattern): def gen_traffic_pattern_host_group(host_list, pattern):
"""Generate host group from specified traffic pattern. """Generate host group from specified traffic pattern.
Args: Args:
...@@ -59,6 +98,8 @@ def gen_tarffic_pattern_host_group(host_list, pattern): ...@@ -59,6 +98,8 @@ def gen_tarffic_pattern_host_group(host_list, pattern):
n = len(host_list) n = len(host_list)
if pattern.name == 'all-nodes': if pattern.name == 'all-nodes':
config = gen_all_nodes_config(n) config = gen_all_nodes_config(n)
elif pattern.name == 'pair-wise':
config = gen_pair_wise_config(n)
else: else:
logger.error('Unsupported traffic pattern: {}'.format(pattern.name)) logger.error('Unsupported traffic pattern: {}'.format(pattern.name))
host_group = __convert_config_to_host_group(config, host_list) host_group = __convert_config_to_host_group(config, host_list)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
from joblib import Parallel, delayed from joblib import Parallel, delayed
from omegaconf import ListConfig, OmegaConf from omegaconf import ListConfig, OmegaConf
from superbench.common.utils import SuperBenchLogger, logger, gen_tarffic_pattern_host_group from superbench.common.utils import SuperBenchLogger, logger, gen_traffic_pattern_host_group
from superbench.runner.ansible import AnsibleClient from superbench.runner.ansible import AnsibleClient
from superbench.benchmarks import ReduceType, Reducer from superbench.benchmarks import ReduceType, Reducer
from superbench.monitor import MonitorRecord from superbench.monitor import MonitorRecord
...@@ -451,7 +451,7 @@ def run(self): ...@@ -451,7 +451,7 @@ def run(self):
else: else:
with open(self._output_path / 'hostfile', 'r') as f: with open(self._output_path / 'hostfile', 'r') as f:
host_list = f.read().splitlines() host_list = f.read().splitlines()
pattern_hostx = gen_tarffic_pattern_host_group(host_list, mode.pattern) pattern_hostx = gen_traffic_pattern_host_group(host_list, mode.pattern)
for host_groups in pattern_hostx: for host_groups in pattern_hostx:
para_rc_list = Parallel(n_jobs=len(host_groups))( para_rc_list = Parallel(n_jobs=len(host_groups))(
delayed(self._run_proc) delayed(self._run_proc)
......
...@@ -5,26 +5,40 @@ ...@@ -5,26 +5,40 @@
import argparse import argparse
import unittest import unittest
from superbench.common.utils import gen_tarffic_pattern_host_group from superbench.common.utils import gen_traffic_pattern_host_group
class GenConfigTest(unittest.TestCase): class GenConfigTest(unittest.TestCase):
"""Test the utils for generating config.""" """Test the utils for generating config."""
def test_gen_tarffic_pattern_host_group(self): def test_gen_traffic_pattern_host_group(self):
"""Test the function of generating traffic pattern config from specified mode.""" """Test the function of generating traffic pattern config from specified mode."""
# test under 8 nodes # Test for all-nodes pattern
hostx = ['node0', 'node1', 'node2', 'node3', 'node4', 'node5', 'node6', 'node7'] hostx = ['node0', 'node1', 'node2', 'node3', 'node4', 'node5', 'node6', 'node7']
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser()
add_help=False,
usage=argparse.SUPPRESS,
allow_abbrev=False,
)
parser.add_argument( parser.add_argument(
'--name', '--name',
type=str, type=str,
default='all-nodes', default='all-nodes',
required=False,
) )
pattern, _ = parser.parse_known_args() pattern, _ = parser.parse_known_args()
expected_host_group = [[['node0', 'node1', 'node2', 'node3', 'node4', 'node5', 'node6', 'node7']]] expected_host_group = [[['node0', 'node1', 'node2', 'node3', 'node4', 'node5', 'node6', 'node7']]]
self.assertEqual(gen_tarffic_pattern_host_group(hostx, pattern), expected_host_group) self.assertEqual(gen_traffic_pattern_host_group(hostx, pattern), expected_host_group)
# Test for pair-wise pattern
parser = argparse.ArgumentParser()
parser.add_argument(
'--name',
type=str,
default='pair-wise',
)
pattern, _ = parser.parse_known_args()
expected_host_group = [
[['node0', 'node7'], ['node1', 'node6'], ['node2', 'node5'], ['node3', 'node4']],
[['node0', 'node1'], ['node2', 'node7'], ['node3', 'node6'], ['node4', 'node5']],
[['node0', 'node2'], ['node3', 'node1'], ['node4', 'node7'], ['node5', 'node6']],
[['node0', 'node3'], ['node4', 'node2'], ['node5', 'node1'], ['node6', 'node7']],
[['node0', 'node4'], ['node5', 'node3'], ['node6', 'node2'], ['node7', 'node1']],
[['node0', 'node5'], ['node6', 'node4'], ['node7', 'node3'], ['node1', 'node2']],
[['node0', 'node6'], ['node7', 'node5'], ['node1', 'node4'], ['node2', 'node3']]
]
self.assertEqual(gen_traffic_pattern_host_group(hostx, pattern), expected_host_group)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment