Runner: validate MPI bind-to option and cover configurable bind-to in tests

655519cb · one · eea26d0d · 655519cb · 655519cb
Commit 655519cb authored Apr 18, 2026 by one
Show whitespace changes
Inline Side-by-side

Showing with 69 additions and 0 deletions

superbench/runner/runner.py superbench/runner/runner.py +10 -0

tests/runner/test_runner.py tests/runner/test_runner.py +59 -0

No files found.
--- a/superbench/runner/runner.py
+++ b/superbench/runner/runner.py
@@ -65,6 +65,14 @@ class SuperBenchRunner():
        """
        SuperBenchLogger.add_handler(logger.logger, filename=str(self._output_path / filename))

+    def __validate_mpi_bind_to(self, bind_to):
+        """Validate mpi bind_to option."""
+        valid_mpi_bind_to = {'slot', 'hwthread', 'core', 'l1cache', 'l2cache', 'l3cache', 'package', 'numa', 'none'}
+        if bind_to not in valid_mpi_bind_to:
+            raise ValueError(
+                'Invalid bind_to value {}. Must be one of: {}'.format(bind_to, sorted(valid_mpi_bind_to))
+            )
+
    def __validate_sb_config(self):    # noqa: C901
        """Validate SuperBench config object.

@@ -98,6 +106,8 @@ class SuperBenchRunner():
                        }
                    if 'bind_to' not in mode:
                        self._sb_benchmarks[name].modes[idx].bind_to = 'numa'
+                    else:
+                        self.__validate_mpi_bind_to(mode.bind_to)
                    for key in ['PATH', 'LD_LIBRARY_PATH', 'SB_MICRO_PATH', 'SB_WORKSPACE']:
                        self._sb_benchmarks[name].modes[idx].env.setdefault(key, None)
                    if 'pattern' in mode:

--- a/tests/runner/test_runner.py
+++ b/tests/runner/test_runner.py
@@ -56,6 +56,8 @@ class RunnerTestCase(unittest.TestCase):
                    self.assertIn('proc_num', mode)
                if mode.name == 'mpi':
                    self.assertIn('mca', mode)
+                    self.assertIn('bind_to', mode)
+                    self.assertEqual('numa', mode.bind_to)

    def test_get_failure_count(self):
        """Test get_failure_count."""
@@ -153,6 +155,7 @@ class RunnerTestCase(unittest.TestCase):
                    'name': 'mpi',
                    'proc_num': 8,
                    'proc_rank': 1,
+                    'bind_to': 'numa',
                    'mca': {},
                    'env': {
                        'PATH': None,
@@ -172,6 +175,7 @@ class RunnerTestCase(unittest.TestCase):
                    'name': 'mpi',
                    'proc_num': 8,
                    'proc_rank': 2,
+                    'bind_to': 'numa',
                    'mca': {
                        'coll_hcoll_enable': 0,
                    },
@@ -196,6 +200,7 @@ class RunnerTestCase(unittest.TestCase):
                    'node_num': 1,
                    'proc_num': 8,
                    'proc_rank': 2,
+                    'bind_to': 'numa',
                    'mca': {
                        'coll_hcoll_enable': 0,
                    },
@@ -240,6 +245,7 @@ class RunnerTestCase(unittest.TestCase):
                    'name': 'mpi',
                    'proc_num': 8,
                    'proc_rank': 1,
+                    'bind_to': 'numa',
                    'mca': {},
                    'pattern': {
                        'type': 'all-nodes',
@@ -255,6 +261,44 @@ class RunnerTestCase(unittest.TestCase):
                    f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo'
                ),
            },
+            {
+                'benchmark_name':
+                'foo',
+                'mode': {
+                    'name': 'mpi',
+                    'proc_num': 8,
+                    'proc_rank': 0,
+                    'bind_to': 'core',
+                    'mca': {},
+                    'env': {
+                        'PATH': None,
+                    },
+                },
+                'expected_command': (
+                    'mpirun -tag-output -allow-run-as-root -hostfile hostfile -map-by ppr:8:node -bind-to core '
+                    ' -x PATH '
+                    f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo'
+                ),
+            },
+            {
+                'benchmark_name':
+                'foo',
+                'mode': {
+                    'name': 'mpi',
+                    'proc_num': 8,
+                    'proc_rank': 0,
+                    'bind_to': 'none',
+                    'mca': {},
+                    'env': {
+                        'PATH': None,
+                    },
+                },
+                'expected_command': (
+                    'mpirun -tag-output -allow-run-as-root -hostfile hostfile -map-by ppr:8:node -bind-to none '
+                    ' -x PATH '
+                    f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo'
+                ),
+            },
        ]

        for test_case in test_cases:
@@ -285,6 +329,21 @@ class RunnerTestCase(unittest.TestCase):
                    ), expected_command
                )

+    def test_validate_sb_config_invalid_mpi_bind_to(self):
+        """Test validate_sb_config rejects unsupported mpi bind_to values."""
+        test_config_file = Path(__file__).parent / '../../tests/data/test.yaml'
+        with test_config_file.open() as fp:
+            invalid_config = OmegaConf.create(yaml.load(fp, Loader=yaml.SafeLoader))
+        invalid_config.superbench.benchmarks['nccl-bw:all-nodes'].modes[0].bind_to = 'socket'
+
+        with self.assertRaisesRegex(ValueError, 'Invalid bind_to value'):
+            SuperBenchRunner(
+                invalid_config,
+                OmegaConf.create({}),
+                OmegaConf.create({}),
+                self.sb_output_dir,
+            )
+
    def test_run_empty_benchmarks(self):
        """Test run empty benchmarks, nothing should happen."""
        self.runner._sb_enabled_benchmarks = []