Unverified Commit 620192a2 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Fix issues in ib loopback benchmark (#369)

Fix several issues in ib loopback benchmark:
* use `--report_gbits` and divide by 8 to get GB/s, previous results are
  MiB/s / 1000
* use the ib_write_bw binary built in third_party instead of system path
* update the metrics name so that different hca indices have same metric
parent 8ef7163a
......@@ -220,11 +220,11 @@ Measure the InfiniBand loopback verbs bandwidth, performed by
#### Metrics
| Name | Unit | Description |
|---------------------------------------------|------------------|--------------------------------------------------------------|
| ib-loopback/ib_write_${msg_size}_ib[0-9]_bw | bandwidth (GB/s) | InfiniBand loopback write bandwidth with given message size. |
| ib-loopback/ib_read_${msg_size}_ib[0-9]_bw | bandwidth (GB/s) | InfiniBand loopback read bandwidth with given message size. |
| ib-loopback/ib_send_${msg_size}_ib[0-9]_bw | bandwidth (GB/s) | InfiniBand loopback send bandwidth with given message size. |
| Name | Unit | Description |
|-------------------------------------|------------------|--------------------------------------------------------------|
| ib-loopback/ib_write_bw_${msg_size} | bandwidth (GB/s) | InfiniBand loopback write bandwidth with given message size. |
| ib-loopback/ib_read_bw_${msg_size} | bandwidth (GB/s) | InfiniBand loopback read bandwidth with given message size. |
| ib-loopback/ib_send_bw_${msg_size} | bandwidth (GB/s) | InfiniBand loopback send bandwidth with given message size. |
### `nccl-bw` / `rccl-bw`
......
......@@ -161,12 +161,13 @@ def _preprocess(self):
server_core = int(numa_cores[-1])
client_core = int(numa_cores[-2])
command += ' ' + str(server_core) + ' ' + str(client_core)
command += ' ' + self.__support_ib_commands[ib_command]
command += ' ' + os.path.join(self._args.bin_dir, self.__support_ib_commands[ib_command])
command += command_mode + ' -F'
command += ' --iters=' + str(self._args.iters)
command += ' -d ' + network.get_ib_devices()[self._args.ib_index].split(':')[0]
command += ' -p ' + str(network.get_free_port())
command += ' -x ' + str(self._args.gid_index)
command += ' --report_gbits'
self._commands.append(command)
except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_DEVICE_GETTING_FAILURE)
......@@ -197,13 +198,13 @@ def _process_raw_result(self, cmd_idx, raw_output):
metric_set = set()
for line in content:
try:
values = list(filter(None, line.split(' ')))
values = list(filter(None, line.split()))
if len(values) != 5:
continue
# Extract value from the line
size = int(values[0])
avg_bw = float(values[-2]) / 1000
metric = 'ib_{}_{}_ib{}_bw'.format(self._args.commands[cmd_idx], size, str(self._args.ib_index))
avg_bw = float(values[-2]) / 8.0
metric = f'{self.__support_ib_commands[self._args.commands[cmd_idx]]}_{size}:{self._args.ib_index}'
# Filter useless value in client output
if metric not in metric_set:
metric_set.add(metric)
......
......@@ -76,7 +76,8 @@ def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_core
ret = benchmark._preprocess()
assert (ret)
expect_command = 'run_perftest_loopback 3 1 ib_write_bw -a -F --iters=2000 -d mlx5_0 -p 10000 -x 0'
expect_command = 'run_perftest_loopback 3 1 ' + benchmark._args.bin_dir + \
'/ib_write_bw -a -F --iters=2000 -d mlx5_0 -p 10000 -x 0 --report_gbits'
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command)
......@@ -87,7 +88,7 @@ def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_core
metric_list = []
for ib_command in benchmark._args.commands:
for size in ['8388608', '4194304', '1024', '2']:
metric = 'ib_{}_{}_ib{}_bw'.format(ib_command, size, str(benchmark._args.ib_index))
metric = 'ib_{}_bw_{}:{}'.format(ib_command, size, str(benchmark._args.ib_index))
metric_list.append(metric)
for metric in metric_list:
assert (metric in benchmark.result)
......@@ -145,7 +146,8 @@ def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores,
ret = benchmark._preprocess()
assert (ret)
expect_command = 'run_perftest_loopback 3 1 ib_write_bw -s 8388608 -F --iters=2000 -d mlx5_0 -p 10000 -x 0'
expect_command = 'run_perftest_loopback 3 1 ' + benchmark._args.bin_dir + \
'/ib_write_bw -s 8388608 -F --iters=2000 -d mlx5_0 -p 10000 -x 0 --report_gbits'
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command)
......@@ -155,7 +157,7 @@ def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores,
# Positive case - valid raw output.
metric_list = []
for ib_command in benchmark._args.commands:
metric = 'ib_{}_8388608_ib{}_bw'.format(ib_command, str(benchmark._args.ib_index))
metric = 'ib_{}_bw_8388608:{}'.format(ib_command, str(benchmark._args.ib_index))
metric_list.append(metric)
for metric in metric_list:
assert (metric in benchmark.result)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment