Unverified Commit 620192a2 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Fix issues in ib loopback benchmark (#369)

Fix several issues in ib loopback benchmark:
* use `--report_gbits` and divide by 8 to get GB/s, previous results are
  MiB/s / 1000
* use the ib_write_bw binary built in third_party instead of system path
* update the metrics name so that different hca indices have same metric
parent 8ef7163a
...@@ -221,10 +221,10 @@ Measure the InfiniBand loopback verbs bandwidth, performed by ...@@ -221,10 +221,10 @@ Measure the InfiniBand loopback verbs bandwidth, performed by
#### Metrics #### Metrics
| Name | Unit | Description | | Name | Unit | Description |
|---------------------------------------------|------------------|--------------------------------------------------------------| |-------------------------------------|------------------|--------------------------------------------------------------|
| ib-loopback/ib_write_${msg_size}_ib[0-9]_bw | bandwidth (GB/s) | InfiniBand loopback write bandwidth with given message size. | | ib-loopback/ib_write_bw_${msg_size} | bandwidth (GB/s) | InfiniBand loopback write bandwidth with given message size. |
| ib-loopback/ib_read_${msg_size}_ib[0-9]_bw | bandwidth (GB/s) | InfiniBand loopback read bandwidth with given message size. | | ib-loopback/ib_read_bw_${msg_size} | bandwidth (GB/s) | InfiniBand loopback read bandwidth with given message size. |
| ib-loopback/ib_send_${msg_size}_ib[0-9]_bw | bandwidth (GB/s) | InfiniBand loopback send bandwidth with given message size. | | ib-loopback/ib_send_bw_${msg_size} | bandwidth (GB/s) | InfiniBand loopback send bandwidth with given message size. |
### `nccl-bw` / `rccl-bw` ### `nccl-bw` / `rccl-bw`
......
...@@ -161,12 +161,13 @@ def _preprocess(self): ...@@ -161,12 +161,13 @@ def _preprocess(self):
server_core = int(numa_cores[-1]) server_core = int(numa_cores[-1])
client_core = int(numa_cores[-2]) client_core = int(numa_cores[-2])
command += ' ' + str(server_core) + ' ' + str(client_core) command += ' ' + str(server_core) + ' ' + str(client_core)
command += ' ' + self.__support_ib_commands[ib_command] command += ' ' + os.path.join(self._args.bin_dir, self.__support_ib_commands[ib_command])
command += command_mode + ' -F' command += command_mode + ' -F'
command += ' --iters=' + str(self._args.iters) command += ' --iters=' + str(self._args.iters)
command += ' -d ' + network.get_ib_devices()[self._args.ib_index].split(':')[0] command += ' -d ' + network.get_ib_devices()[self._args.ib_index].split(':')[0]
command += ' -p ' + str(network.get_free_port()) command += ' -p ' + str(network.get_free_port())
command += ' -x ' + str(self._args.gid_index) command += ' -x ' + str(self._args.gid_index)
command += ' --report_gbits'
self._commands.append(command) self._commands.append(command)
except BaseException as e: except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE) self._result.set_return_code(ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE)
...@@ -197,13 +198,13 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -197,13 +198,13 @@ def _process_raw_result(self, cmd_idx, raw_output):
metric_set = set() metric_set = set()
for line in content: for line in content:
try: try:
values = list(filter(None, line.split(' '))) values = list(filter(None, line.split()))
if len(values) != 5: if len(values) != 5:
continue continue
# Extract value from the line # Extract value from the line
size = int(values[0]) size = int(values[0])
avg_bw = float(values[-2]) / 1000 avg_bw = float(values[-2]) / 8.0
metric = 'ib_{}_{}_ib{}_bw'.format(self._args.commands[cmd_idx], size, str(self._args.ib_index)) metric = f'{self.__support_ib_commands[self._args.commands[cmd_idx]]}_{size}:{self._args.ib_index}'
# Filter useless value in client output # Filter useless value in client output
if metric not in metric_set: if metric not in metric_set:
metric_set.add(metric) metric_set.add(metric)
......
...@@ -76,7 +76,8 @@ def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_core ...@@ -76,7 +76,8 @@ def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_core
ret = benchmark._preprocess() ret = benchmark._preprocess()
assert (ret) assert (ret)
expect_command = 'run_perftest_loopback 3 1 ib_write_bw -a -F --iters=2000 -d mlx5_0 -p 10000 -x 0' expect_command = 'run_perftest_loopback 3 1 ' + benchmark._args.bin_dir + \
'/ib_write_bw -a -F --iters=2000 -d mlx5_0 -p 10000 -x 0 --report_gbits'
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1] command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command) assert (command == expect_command)
...@@ -87,7 +88,7 @@ def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_core ...@@ -87,7 +88,7 @@ def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_core
metric_list = [] metric_list = []
for ib_command in benchmark._args.commands: for ib_command in benchmark._args.commands:
for size in ['8388608', '4194304', '1024', '2']: for size in ['8388608', '4194304', '1024', '2']:
metric = 'ib_{}_{}_ib{}_bw'.format(ib_command, size, str(benchmark._args.ib_index)) metric = 'ib_{}_bw_{}:{}'.format(ib_command, size, str(benchmark._args.ib_index))
metric_list.append(metric) metric_list.append(metric)
for metric in metric_list: for metric in metric_list:
assert (metric in benchmark.result) assert (metric in benchmark.result)
...@@ -145,7 +146,8 @@ def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores, ...@@ -145,7 +146,8 @@ def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores,
ret = benchmark._preprocess() ret = benchmark._preprocess()
assert (ret) assert (ret)
expect_command = 'run_perftest_loopback 3 1 ib_write_bw -s 8388608 -F --iters=2000 -d mlx5_0 -p 10000 -x 0' expect_command = 'run_perftest_loopback 3 1 ' + benchmark._args.bin_dir + \
'/ib_write_bw -s 8388608 -F --iters=2000 -d mlx5_0 -p 10000 -x 0 --report_gbits'
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1] command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command) assert (command == expect_command)
...@@ -155,7 +157,7 @@ def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores, ...@@ -155,7 +157,7 @@ def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores,
# Positive case - valid raw output. # Positive case - valid raw output.
metric_list = [] metric_list = []
for ib_command in benchmark._args.commands: for ib_command in benchmark._args.commands:
metric = 'ib_{}_8388608_ib{}_bw'.format(ib_command, str(benchmark._args.ib_index)) metric = 'ib_{}_bw_8388608:{}'.format(ib_command, str(benchmark._args.ib_index))
metric_list.append(metric) metric_list.append(metric)
for metric in metric_list: for metric in metric_list:
assert (metric in benchmark.result) assert (metric in benchmark.result)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment