Fix issues in ib loopback benchmark (#369)

Fix several issues in ib loopback benchmark: * use `--report_gbits` and divide by 8 to get GB/s, previous results are MiB/s / 1000 * use the ib_write_bw binary built in third_party instead of system path * update the metrics name so that different hca indices have same metric

Fix issues in ib loopback benchmark (#369)
Fix several issues in ib loopback benchmark: * use `--report_gbits` and divide by 8 to get GB/s, previous results are MiB/s / 1000 * use the ib_write_bw binary built in third_party instead of system path * update the metrics name so that different hca indices have same metric
620192a2 · Yifan Xiong · GitHub · 8ef7163a · 620192a2 · 620192a2
Unverified Commit 620192a2 authored Jun 30, 2022 by Yifan Xiong Committed by GitHub Jun 29, 2022
3 changed files
--- a/docs/user-tutorial/benchmarks/micro-benchmarks.md
+++ b/docs/user-tutorial/benchmarks/micro-benchmarks.md
@@ -221,10 +221,10 @@ Measure the InfiniBand loopback verbs bandwidth, performed by
 #### Metrics
 | Name                                | Unit             | Description                                                  |
-|---------------------------------------------|------------------|--------------------------------------------------------------|
+|-------------------------------------|------------------|--------------------------------------------------------------|
-| ib-loopback/ib_write_${msg_size}_ib[0-9]_bw | bandwidth (GB/s) | InfiniBand loopback write bandwidth with given message size. |
+| ib-loopback/ib_write_bw_${msg_size} | bandwidth (GB/s) | InfiniBand loopback write bandwidth with given message size. |
-| ib-loopback/ib_read_${msg_size}_ib[0-9]_bw  | bandwidth (GB/s) | InfiniBand loopback read bandwidth with given message size.  |
+| ib-loopback/ib_read_bw_${msg_size}  | bandwidth (GB/s) | InfiniBand loopback read bandwidth with given message size.  |
-| ib-loopback/ib_send_${msg_size}_ib[0-9]_bw  | bandwidth (GB/s) | InfiniBand loopback send bandwidth with given message size.  |
+| ib-loopback/ib_send_bw_${msg_size}  | bandwidth (GB/s) | InfiniBand loopback send bandwidth with given message size.  |
 ### `nccl-bw` / `rccl-bw`

--- a/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py
@@ -161,12 +161,13 @@ def _preprocess(self):
                        server_core = int(numa_cores[-1])
                        client_core = int(numa_cores[-2])
                    command += ' ' + str(server_core) + ' ' + str(client_core)
-                    command += ' ' + self.__support_ib_commands[ib_command]
+                    command += ' ' + os.path.join(self._args.bin_dir, self.__support_ib_commands[ib_command])
                    command += command_mode + ' -F'
                    command += ' --iters=' + str(self._args.iters)
                    command += ' -d ' + network.get_ib_devices()[self._args.ib_index].split(':')[0]
                    command += ' -p ' + str(network.get_free_port())
                    command += ' -x ' + str(self._args.gid_index)
+                    command += ' --report_gbits'
                    self._commands.append(command)
                except BaseException as e:
                    self._result.set_return_code(ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE)
@@ -197,13 +198,13 @@ def _process_raw_result(self, cmd_idx, raw_output):
        metric_set = set()
        for line in content:
            try:
-                values = list(filter(None, line.split(' ')))
+                values = list(filter(None, line.split()))
                if len(values) != 5:
                    continue
                # Extract value from the line
                size = int(values[0])
-                avg_bw = float(values[-2]) / 1000
+                avg_bw = float(values[-2]) / 8.0
-                metric = 'ib_{}_{}_ib{}_bw'.format(self._args.commands[cmd_idx], size, str(self._args.ib_index))
+                metric = f'{self.__support_ib_commands[self._args.commands[cmd_idx]]}_{size}:{self._args.ib_index}'
                # Filter useless value in client output
                if metric not in metric_set:
                    metric_set.add(metric)

--- a/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py
+++ b/tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py
@@ -76,7 +76,8 @@ def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_core
        ret = benchmark._preprocess()
        assert (ret)
-        expect_command = 'run_perftest_loopback 3 1 ib_write_bw -a -F --iters=2000 -d mlx5_0 -p 10000 -x 0'
+        expect_command = 'run_perftest_loopback 3 1 ' + benchmark._args.bin_dir + \
+            '/ib_write_bw -a -F --iters=2000 -d mlx5_0 -p 10000 -x 0 --report_gbits'
        command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
        assert (command == expect_command)
@@ -87,7 +88,7 @@ def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_core
        metric_list = []
        for ib_command in benchmark._args.commands:
            for size in ['8388608', '4194304', '1024', '2']:
-                metric = 'ib_{}_{}_ib{}_bw'.format(ib_command, size, str(benchmark._args.ib_index))
+                metric = 'ib_{}_bw_{}:{}'.format(ib_command, size, str(benchmark._args.ib_index))
                metric_list.append(metric)
        for metric in metric_list:
            assert (metric in benchmark.result)
@@ -145,7 +146,8 @@ def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores,
        ret = benchmark._preprocess()
        assert (ret)
-        expect_command = 'run_perftest_loopback 3 1 ib_write_bw -s 8388608 -F --iters=2000 -d mlx5_0 -p 10000 -x 0'
+        expect_command = 'run_perftest_loopback 3 1 ' + benchmark._args.bin_dir + \
+            '/ib_write_bw -s 8388608 -F --iters=2000 -d mlx5_0 -p 10000 -x 0 --report_gbits'
        command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
        assert (command == expect_command)
@@ -155,7 +157,7 @@ def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores,
        # Positive case - valid raw output.
        metric_list = []
        for ib_command in benchmark._args.commands:
-            metric = 'ib_{}_8388608_ib{}_bw'.format(ib_command, str(benchmark._args.ib_index))
+            metric = 'ib_{}_bw_8388608:{}'.format(ib_command, str(benchmark._args.ib_index))
            metric_list.append(metric)
        for metric in metric_list:
            assert (metric in benchmark.result)