Unverified Commit 7838b6b1 authored by Yang Wang's avatar Yang Wang Committed by GitHub
Browse files

Runner - Support `pair-wise` pattern in `mpi` mode (#447)

* Extract pair-wise pattern from ib_validation
parent 6186146d
......@@ -463,4 +463,4 @@ Only available for `mpi` mode.
Available variables in formatted string includes:
+ `name`
* accepted values: `all-nodes`
* accepted values: `all-nodes`, `pair-wise`
......@@ -9,7 +9,7 @@
from superbench.common.utils.lazy_import import LazyImport
from superbench.common.utils.process import run_command
from superbench.common.utils.topo_aware import gen_topo_aware_config
from superbench.common.utils.gen_traffic_pattern_config import gen_tarffic_pattern_host_group
from superbench.common.utils.gen_traffic_pattern_config import gen_pair_wise_config, gen_traffic_pattern_host_group
device_manager = LazyImport('superbench.common.utils.device_manager')
......@@ -25,5 +25,6 @@
'rotate_dir',
'run_command',
'gen_topo_aware_config',
'gen_tarffic_pattern_host_group',
'gen_pair_wise_config',
'gen_traffic_pattern_host_group',
]
......@@ -22,6 +22,45 @@ def gen_all_nodes_config(n):
return config
def gen_pair_wise_config(n):
"""Generate pair-wised VM pairs config.
One-to-one means that each participant plays every other participant once.
The algorithm refers circle method of Round-robin tournament in
https://en.wikipedia.org/wiki/Round-robin_tournament.
if n is even, there are a total of n-1 rounds, with n/2 pair of 2 unique participants in each round.
If n is odd, there will be n rounds, each with n-1/2 pairs, and one participant rotating empty in that round.
In each round, pair up two by two from the beginning to the middle as (begin, end),(begin+1,end-1)...
Then, all the participants except the beginning shift left one position, and repeat the previous step.
Args:
n (int): the number of participants.
Returns:
config (list): the generated config list, each item in the list is a str like "0,1;2,3".
"""
config = []
if n <= 0:
logger.warning('n is not positive')
return config
candidates = list(range(n))
# Add a fake participant if n is odd
if n % 2 == 1:
candidates.append(-1)
count = len(candidates)
non_moving = [candidates[0]]
for _ in range(count - 1):
pairs = [
'{},{}'.format(candidates[i], candidates[count - i - 1]) for i in range(0, count // 2)
if candidates[i] != -1 and candidates[count - i - 1] != -1
]
row = ';'.join(pairs)
config.append(row)
robin = candidates[2:] + candidates[1:2]
candidates = non_moving + robin
return config
def __convert_config_to_host_group(config, host_list):
"""Convert config format to host node.
......@@ -45,7 +84,7 @@ def __convert_config_to_host_group(config, host_list):
return host_groups
def gen_tarffic_pattern_host_group(host_list, pattern):
def gen_traffic_pattern_host_group(host_list, pattern):
"""Generate host group from specified traffic pattern.
Args:
......@@ -59,6 +98,8 @@ def gen_tarffic_pattern_host_group(host_list, pattern):
n = len(host_list)
if pattern.name == 'all-nodes':
config = gen_all_nodes_config(n)
elif pattern.name == 'pair-wise':
config = gen_pair_wise_config(n)
else:
logger.error('Unsupported traffic pattern: {}'.format(pattern.name))
host_group = __convert_config_to_host_group(config, host_list)
......
......@@ -14,7 +14,7 @@
from joblib import Parallel, delayed
from omegaconf import ListConfig, OmegaConf
from superbench.common.utils import SuperBenchLogger, logger, gen_tarffic_pattern_host_group
from superbench.common.utils import SuperBenchLogger, logger, gen_traffic_pattern_host_group
from superbench.runner.ansible import AnsibleClient
from superbench.benchmarks import ReduceType, Reducer
from superbench.monitor import MonitorRecord
......@@ -451,7 +451,7 @@ def run(self):
else:
with open(self._output_path / 'hostfile', 'r') as f:
host_list = f.read().splitlines()
pattern_hostx = gen_tarffic_pattern_host_group(host_list, mode.pattern)
pattern_hostx = gen_traffic_pattern_host_group(host_list, mode.pattern)
for host_groups in pattern_hostx:
para_rc_list = Parallel(n_jobs=len(host_groups))(
delayed(self._run_proc)
......
......@@ -5,26 +5,40 @@
import argparse
import unittest
from superbench.common.utils import gen_tarffic_pattern_host_group
from superbench.common.utils import gen_traffic_pattern_host_group
class GenConfigTest(unittest.TestCase):
"""Test the utils for generating config."""
def test_gen_tarffic_pattern_host_group(self):
def test_gen_traffic_pattern_host_group(self):
"""Test the function of generating traffic pattern config from specified mode."""
# test under 8 nodes
# Test for all-nodes pattern
hostx = ['node0', 'node1', 'node2', 'node3', 'node4', 'node5', 'node6', 'node7']
parser = argparse.ArgumentParser(
add_help=False,
usage=argparse.SUPPRESS,
allow_abbrev=False,
)
parser = argparse.ArgumentParser()
parser.add_argument(
'--name',
type=str,
default='all-nodes',
required=False,
)
pattern, _ = parser.parse_known_args()
expected_host_group = [[['node0', 'node1', 'node2', 'node3', 'node4', 'node5', 'node6', 'node7']]]
self.assertEqual(gen_tarffic_pattern_host_group(hostx, pattern), expected_host_group)
self.assertEqual(gen_traffic_pattern_host_group(hostx, pattern), expected_host_group)
# Test for pair-wise pattern
parser = argparse.ArgumentParser()
parser.add_argument(
'--name',
type=str,
default='pair-wise',
)
pattern, _ = parser.parse_known_args()
expected_host_group = [
[['node0', 'node7'], ['node1', 'node6'], ['node2', 'node5'], ['node3', 'node4']],
[['node0', 'node1'], ['node2', 'node7'], ['node3', 'node6'], ['node4', 'node5']],
[['node0', 'node2'], ['node3', 'node1'], ['node4', 'node7'], ['node5', 'node6']],
[['node0', 'node3'], ['node4', 'node2'], ['node5', 'node1'], ['node6', 'node7']],
[['node0', 'node4'], ['node5', 'node3'], ['node6', 'node2'], ['node7', 'node1']],
[['node0', 'node5'], ['node6', 'node4'], ['node7', 'node3'], ['node1', 'node2']],
[['node0', 'node6'], ['node7', 'node5'], ['node1', 'node4'], ['node2', 'node3']]
]
self.assertEqual(gen_traffic_pattern_host_group(hostx, pattern), expected_host_group)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment