Unverified Commit 10012a0a authored by Ziyue Yang's avatar Ziyue Yang Committed by GitHub
Browse files

Docs - Add benchmark metrics for cpu-memory-bw-latency (#264)

**Description**
Add benchmark metrics for cpu-memory-bw-latency.
parent b6781968
......@@ -108,6 +108,25 @@ Inference performance of the torchvision models using ONNXRuntime. Currently the
## Communication Benchmarks
### `cpu-memory-bw-latency`
#### Introduction
Measure the memory copy bandwidth and latency across different CPU NUMA nodes.
performed by [Intel MLC Tool](https://www.intel.com/content/www/us/en/developer/articles/tool/intelr-memory-latency-checker.html).
#### Metrics
| Name | Unit | Description |
|-------------------------------------------------------------------------|------------------|---------------------------------------------------------------------|
| cpu-memory-bw-latency/mem\_bandwidth\_matrix\_numa\_[0-9]+\_[0-9]+\_bw | bandwidth (GB/s) | Former NUMA to latter NUMA memory bandwidth. |
| cpu-memory-bw-latency/mem\_bandwidth\_matrix\_numa\_[0-9]+\_[0-9]+\_lat | time (us) | Former NUMA to latter NUMA memory latency. |
| cpu-memory-bw-latency/mem\_max\_bandwidth\_all\_reads\_bw | bandwidth (GB/s) | Whole-CPU maximum memory bandwidth, full read. |
| cpu-memory-bw-latency/mem\_max\_bandwidth\_3_1\_reads-writes\_bw | bandwidth (GB/s) | Whole-CPU maximum memory bandwidth, read : write = 3 : 1. |
| cpu-memory-bw-latency/mem\_max\_bandwidth\_2_1\_reads-writes\_bw | bandwidth (GB/s) | Whole-CPU maximum memory bandwidth, read : write = 2 : 1. |
| cpu-memory-bw-latency/mem\_max\_bandwidth\_1_1\_reads-writes\_bw | bandwidth (GB/s) | Whole-CPU maximum memory bandwidth, read : write = 1 : 1. |
| cpu-memory-bw-latency/mem\_max\_bandwidth\_stream-triad\_like\_bw | bandwidth (GB/s) | Whole-CPU maximum memory bandwidth, with stream-triad like pattern. |
### `mem-bw`
#### Introduction
......
......@@ -90,13 +90,13 @@ def _process_raw_result(self, cmd_idx, raw_output):
return False
mlc_test = mlc_test.split(';')[0]
if 'max_bandwidth' in mlc_test:
measure = 'BW'
measure = 'bw'
out_table = self._parse_max_bw(raw_output)
elif 'bandwidth_matrix' in mlc_test:
measure = 'BW'
measure = 'bw'
out_table = self._parse_bw_latency(raw_output)
elif 'latency_matrix' in mlc_test:
measure = 'Latency'
measure = 'lat'
out_table = self._parse_bw_latency(raw_output)
else:
logger.error('Invalid option {} to run the {} command'.format(mlc_test, self._commands[cmd_idx]))
......@@ -112,9 +112,9 @@ def _process_raw_result(self, cmd_idx, raw_output):
for key in out_table.keys():
for index in range(len(out_table[key])):
if 'max_bandwidth' in mlc_test:
metric = 'Mem_{}_{}_{}'.format(mlc_test, key, measure)
metric = 'mem_{}_{}_{}'.format(mlc_test, key, measure).lower()
else:
metric = 'Mem_{}_{}_{}_{}'.format(mlc_test, key, str(index), measure)
metric = 'mem_{}_{}_{}_{}'.format(mlc_test, key, str(index), measure).lower()
self._result.add_result(metric, float(out_table[key][index]))
return True
......
......@@ -94,10 +94,10 @@ def test_cpu_mem_bw_latency_benchmark_result_parsing(self):
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert ('raw_output_0' in benchmark.raw_data)
assert ([test_raw_output] == benchmark.raw_data['raw_output_0'])
assert ([82542.2] == benchmark.result['Mem_bandwidth_matrix_numa_0_0_BW'])
assert ([76679.9] == benchmark.result['Mem_bandwidth_matrix_numa_0_1_BW'])
assert ([76536.0] == benchmark.result['Mem_bandwidth_matrix_numa_1_0_BW'])
assert ([82986.5] == benchmark.result['Mem_bandwidth_matrix_numa_1_1_BW'])
assert ([82542.2] == benchmark.result['mem_bandwidth_matrix_numa_0_0_bw'])
assert ([76679.9] == benchmark.result['mem_bandwidth_matrix_numa_0_1_bw'])
assert ([76536.0] == benchmark.result['mem_bandwidth_matrix_numa_1_0_bw'])
assert ([82986.5] == benchmark.result['mem_bandwidth_matrix_numa_1_1_bw'])
# Positive case - valid latency matrix output.
test_raw_output = """
......@@ -118,10 +118,10 @@ def test_cpu_mem_bw_latency_benchmark_result_parsing(self):
assert ('raw_output_1' in benchmark.raw_data)
assert ([test_raw_output] == benchmark.raw_data['raw_output_1'])
assert ([87.0] == benchmark.result['Mem_latency_matrix_numa_0_0_Latency'])
assert ([101.0] == benchmark.result['Mem_latency_matrix_numa_0_1_Latency'])
assert ([101.9] == benchmark.result['Mem_latency_matrix_numa_1_0_Latency'])
assert ([86.9] == benchmark.result['Mem_latency_matrix_numa_1_1_Latency'])
assert ([87.0] == benchmark.result['mem_latency_matrix_numa_0_0_lat'])
assert ([101.0] == benchmark.result['mem_latency_matrix_numa_0_1_lat'])
assert ([101.9] == benchmark.result['mem_latency_matrix_numa_1_0_lat'])
assert ([86.9] == benchmark.result['mem_latency_matrix_numa_1_1_lat'])
# Positive case - valid max bandwidth output.
test_raw_output = """
......@@ -148,11 +148,11 @@ def test_cpu_mem_bw_latency_benchmark_result_parsing(self):
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert ('raw_output_2' in benchmark.raw_data)
assert ([test_raw_output] == benchmark.raw_data['raw_output_2'])
assert ([165400.60] == benchmark.result['Mem_max_bandwidth_ALL_Reads_BW'])
assert ([154975.19] == benchmark.result['Mem_max_bandwidth_3_1_Reads-Writes_BW'])
assert ([158433.32] == benchmark.result['Mem_max_bandwidth_2_1_Reads-Writes_BW'])
assert ([157352.05] == benchmark.result['Mem_max_bandwidth_1_1_Reads-Writes_BW'])
assert ([157878.32] == benchmark.result['Mem_max_bandwidth_Stream-triad_like_BW'])
assert ([165400.60] == benchmark.result['mem_max_bandwidth_all_reads_bw'])
assert ([154975.19] == benchmark.result['mem_max_bandwidth_3_1_reads-writes_bw'])
assert ([158433.32] == benchmark.result['mem_max_bandwidth_2_1_reads-writes_bw'])
assert ([157352.05] == benchmark.result['mem_max_bandwidth_1_1_reads-writes_bw'])
assert ([157878.32] == benchmark.result['mem_max_bandwidth_stream-triad_like_bw'])
# Negative case - invalid raw output.
assert (benchmark._process_raw_result(0, 'Invalid raw output') is False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment