Unverified Commit ff563b66 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Release - SuperBench v0.4.0 (#278)



__Description__

Cherry-pick  bug fixes from v0.4.0 to main.

__Major Revisions__

* Bug - Fix issues for Ansible and benchmarks (#267)
* Tests - Refine test cases for microbenchmark (#268)
* Bug - Build openmpi with ucx support in rocm dockerfiles (#269)
* Benchmarks: Fix Bug - Fix fio build issue (#272)
* Docs - Unify metric and add doc for cublas and cudnn functions (#271)
* Monitor: Revision - Add 'monitor/' prefix to monitor metrics in result summary (#274)
* Bug - Fix bug of detecting if gpu_index is none (#275)
* Bug - Fix bugs in data diagnosis (#273)
* Bug - Fix issue that the root mpi rank may not be the first in the hostfile (#270)
* Benchmarks: Configuration - Update inference and network benchmarks in configs (#276)
* Docs - Upgrade version and release note (#277)
Co-authored-by: default avatarYuting Jiang <v-yutjiang@microsoft.com>
parent 682ed06a
# SuperBench Config
version: v0.3
version: v0.4
superbench:
enable: null
monitor:
enable: false
enable: true
sample_duration: 1
sample_interval: 10
var:
......@@ -107,9 +107,56 @@ superbench:
<<: *default_pytorch_mode
computation-communication-overlap:
<<: *default_pytorch_mode
ib-traffic:
enable: false
modes:
- name: mpi
proc_num: 1
gpcnet-network-test:
enable: false
modes:
- name: mpi
proc_num: 1
mca:
pml: ucx
btl: ^uct
btl_tcp_if_include: eth0
env:
UCX_NET_DEVICES: mlx5_0:1
gpcnet-network-load-test:
enable: false
modes:
- name: mpi
proc_num: 1
mca:
pml: ucx
btl: ^uct
btl_tcp_if_include: eth0
env:
UCX_NET_DEVICES: mlx5_0:1
tcp-connectivity:
enable: false
modes:
- name: local
parallel: no
parameters:
port: 22
ort-inference:
<<: *default_local_mode
enable: false
tensorrt-inference:
<<: *default_local_mode
parameters:
pytorch_models:
- resnet50
- resnet101
- resnet152
- densenet169
- densenet201
- bert-base
- bert-large
seq_length: 224
batch_size: 32
precision: int8
gpt_models:
<<: *default_pytorch_mode
models:
......
......@@ -3,6 +3,7 @@
"""SuperBench Ansible Client."""
import tempfile
from pathlib import Path
import ansible_runner
......@@ -22,10 +23,10 @@ def __init__(self, config):
"""
self._playbook_path = Path(__file__).parent / 'playbooks'
self._config = {
'private_data_dir': None,
'host_pattern': 'localhost',
'cmdline': '--forks 128',
}
self._head_host = None
if config:
inventory_file = getattr(config, 'host_file', None)
inventory_list = getattr(config, 'host_list', None)
......@@ -34,9 +35,10 @@ def __init__(self, config):
if inventory_file or inventory_list:
self._config['host_pattern'] = 'all'
inventory = InventoryManager(loader=DataLoader(), sources=inventory_file or f'{inventory_list},')
host_list = inventory.get_groups_dict()['all']
host_list = inventory.get_hosts(pattern='all', order='sorted')
if len(host_list) > 0:
self._config['cmdline'] = '--forks {}'.format(len(host_list))
self._head_host = host_list[0].get_name()
if inventory_list in ['localhost', '127.0.0.1']:
self._config['cmdline'] += ' --connection local'
self._config['cmdline'] += ' --inventory {}'.format(inventory_file or f'{inventory_list},')
......@@ -69,12 +71,13 @@ def run(self, ansible_config, sudo=False): # pragma: no cover
if sudo:
logger.info('Run as sudo ...')
ansible_config['cmdline'] += ' --become'
r = ansible_runner.run(**ansible_config)
with tempfile.TemporaryDirectory(prefix='ansible') as tmpdir:
r = ansible_runner.run(private_data_dir=tmpdir, **ansible_config)
logger.debug(r.stats)
if r.rc == 0:
logger.info('Run succeed, return code {}.'.format(r.rc))
else:
logger.warning('Run failed, return code {}.'.format(r.rc))
logger.debug(r.stats)
return r.rc
def update_mpi_config(self, ansible_config):
......@@ -86,7 +89,10 @@ def update_mpi_config(self, ansible_config):
Returns:
dict: Updated Ansible config dict.
"""
if not self._head_host:
ansible_config['host_pattern'] += '[0]'
else:
ansible_config['host_pattern'] = self._head_host
return ansible_config
def get_shell_config(self, cmd):
......
- name: Fetch Results
hosts: all
gather_facts: true
vars:
workspace: '{{ ansible_user_dir }}/sb-workspace'
tasks:
- name: Synchronize Output Directory
ansible.posix.synchronize:
mode: pull
src: '{{ sb_output_dir }}/'
src: '{{ sb_output_dir if sb_output_dir.startswith("/") else workspace + "/" + sb_output_dir }}/'
dest: '{{ absolute_output_dir }}/nodes/{{ ansible_hostname }}'
rsync_opts:
- --exclude=nodes
......@@ -39,7 +39,7 @@ def __init__(self, sb_config, docker_config, ansible_config, sb_output_dir):
self._ansible_client = AnsibleClient(ansible_config)
self.__set_logger('sb-run.log')
logger.info('Runner uses config: %s.', pformat(self._sb_config))
logger.info('Runner uses config: %s.', pformat(OmegaConf.to_container(self._sb_config, resolve=True)))
logger.info('Runner writes to: %s.', str(self._output_path))
self._sb_benchmarks = self._sb_config.superbench.benchmarks
......@@ -336,7 +336,8 @@ def __merge_monitor_metrics(self, node_path):
for pattern, reduce_type in MonitorRecord.reduce_ops.items():
if pattern in metric:
reduce_func = Reducer.get_reduce_func(reduce_type)
metrics_summary[metric] = reduce_func(values)
metric_name = 'monitor/{}'.format(metric)
metrics_summary[metric_name] = reduce_func(values)
continue
return metrics_summary
......
......@@ -18,9 +18,10 @@ class TestDataDiagnosis(unittest.TestCase):
"""Test for DataDiagnosis class."""
def setUp(self):
"""Method called to prepare the test fixture."""
self.output_excel_file = str(Path(__file__).parent.resolve()) + '/diagnosis_summary.xlsx'
self.test_rule_file_fake = str(Path(__file__).parent.resolve()) + '/test_rules_fake.yaml'
self.output_json_file = str(Path(__file__).parent.resolve()) + '/diagnosis_summary.jsonl'
self.parent_path = Path(__file__).parent
self.output_excel_file = str(self.parent_path / 'diagnosis_summary.xlsx')
self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
self.output_json_file = str(self.parent_path / 'diagnosis_summary.jsonl')
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
......@@ -33,21 +34,31 @@ def test_data_diagnosis(self):
"""Test for rule-based data diagnosis."""
# Test - read_raw_data and get_metrics_from_raw_data
# Positive case
test_raw_data = str(Path(__file__).parent.resolve()) + '/test_results.jsonl'
test_rule_file = str(Path(__file__).parent.resolve()) + '/test_rules.yaml'
test_baseline_file = str(Path(__file__).parent.resolve()) + '/test_baseline.json'
test_raw_data = str(self.parent_path / 'test_results.jsonl')
test_rule_file = str(self.parent_path / 'test_rules.yaml')
test_baseline_file = str(self.parent_path / 'test_baseline.json')
diag1 = DataDiagnosis()
diag1._raw_data_df = file_handler.read_raw_data(test_raw_data)
diag1._metrics = diag1._get_metrics_by_benchmarks(list(diag1._raw_data_df))
assert (len(diag1._raw_data_df) == 3)
# Negative case
test_raw_data_fake = str(Path(__file__).parent.resolve()) + '/test_results_fake.jsonl'
test_rule_file_fake = str(Path(__file__).parent.resolve()) + '/test_rules_fake.yaml'
test_raw_data_fake = str(self.parent_path / 'test_results_fake.jsonl')
test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
diag2 = DataDiagnosis()
diag2._raw_data_df = file_handler.read_raw_data(test_raw_data_fake)
diag2._metrics = diag2._get_metrics_by_benchmarks(list(diag2._raw_data_df))
assert (len(diag2._raw_data_df) == 0)
assert (len(diag2._metrics) == 0)
metric_list = [
'gpu_temperature', 'gpu_power_limit', 'gemm-flops/FP64',
'bert_models/pytorch-bert-base/steptime_train_float32'
]
self.assertDictEqual(
diag2._get_metrics_by_benchmarks(metric_list), {
'gemm-flops': {'gemm-flops/FP64'},
'bert_models': {'bert_models/pytorch-bert-base/steptime_train_float32'}
}
)
# Test - read rules
rules = file_handler.read_rules(test_rule_file_fake)
assert (not rules)
......@@ -176,3 +187,27 @@ def test_data_diagnosis(self):
assert ('Category' in line)
assert ('Defective Details' in line)
assert ('Index' in line)
def test_data_diagnosis_run(self):
"""Test for the run process of rule-based data diagnosis."""
test_raw_data = str(self.parent_path / 'test_results.jsonl')
test_rule_file = str(self.parent_path / 'test_rules.yaml')
test_baseline_file = str(self.parent_path / 'test_baseline.json')
# Test - output in excel
DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'excel')
excel_file = pd.ExcelFile(self.output_excel_file, engine='openpyxl')
data_sheet_name = 'Not Accept'
data_not_accept_read_from_excel = excel_file.parse(data_sheet_name)
expect_result_file = pd.ExcelFile(str(self.parent_path / '../data/diagnosis_summary.xlsx'), engine='openpyxl')
expect_result = expect_result_file.parse(data_sheet_name)
pd.util.testing.assert_frame_equal(data_not_accept_read_from_excel, expect_result)
# Test - output in json
DataDiagnosis().run(test_raw_data, test_rule_file, test_baseline_file, str(self.parent_path), 'json')
assert (Path(self.output_json_file).is_file())
with Path(self.output_json_file).open() as f:
data_not_accept_read_from_json = f.read()
expect_result_file = self.parent_path / '../data/diagnosis_summary.jsonl'
with Path(expect_result_file).open() as f:
expect_result = f.read()
assert (data_not_accept_read_from_json == expect_result)
# SuperBench rules
version: v0.3
version: v0.4
superbench:
rules:
rule0:
......
......@@ -14,4 +14,5 @@
vars:
ssh_port: 12345
output_dir: /tmp/test_ansible
docker_image: superbench/superbench
# use a mock superbench image (requires `sb` binary inside)
docker_image: superbench/superbench:v0.3.0-cuda11.1.1
......@@ -3,29 +3,20 @@
"""Tests for cpu-memory-bw-latency benchmark."""
from pathlib import Path
import os
import unittest
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
class CpuMemBwLatencyBenchmarkTest(unittest.TestCase):
class CpuMemBwLatencyBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Test class for cpu-memory-bw-latency benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
self.__curr_micro_path = os.environ.get('SB_MICRO_PATH', '')
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = binary_path / 'mlc'
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
os.environ['SB_MICRO_PATH'] = self.__curr_micro_path
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/mlc'])
def test_cpu_mem_bw_latency_benchmark_empty_param(self):
"""Test cpu-memory-bw-latency benchmark command generation with empty parameter."""
......
......@@ -3,29 +3,22 @@
"""Tests for gemm-flops benchmark."""
import os
import unittest
from pathlib import Path
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.common.utils import device_manager as dm
from superbench.benchmarks import BenchmarkRegistry, ReturnCode, Platform, BenchmarkType
class CudaGemmFlopsBenchmarkTest(unittest.TestCase):
class CudaGemmFlopsBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for CudaGemmFlopsBenchmark benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
Path(binary_path).mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(os.path.join(binary_path, 'cutlass_profiler'))
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/cutlass_profiler'])
@decorator.cuda_test
def test_flops_performance_cuda(self):
......
......@@ -3,28 +3,22 @@
"""Tests for disk-performance benchmark."""
from pathlib import Path
from unittest import mock
import os
import unittest
from unittest import mock
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
class DiskBenchmarkTest(unittest.TestCase):
class DiskBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Test class for disk-performance benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = binary_path / 'fio'
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/fio'])
def test_disk_performance_empty_param(self):
"""Test disk-performance benchmark command generation with empty parameter."""
......@@ -178,7 +172,8 @@ def test_disk_performance_benchmark_enabled(self, mock_is_block_device):
assert ('--rwmixread=%d' % default_rwmixread in benchmark._commands[command_idx])
command_idx += 1
def test_disk_performance_result_parsing(self):
@decorator.load_data('tests/data/disk_performance.log')
def test_disk_performance_result_parsing(self, test_raw_output):
"""Test disk-performance benchmark result parsing."""
benchmark_name = 'disk-benchmark'
(benchmark_class,
......@@ -193,317 +188,6 @@ def test_disk_performance_result_parsing(self):
assert (benchmark.type == BenchmarkType.MICRO)
# Positive case - valid raw output.
test_raw_output = """
{
"fio version" : "fio-3.16",
"timestamp" : 1626763278,
"timestamp_ms" : 1626763278577,
"time" : "Tue Jul 20 06:41:18 2021",
"global options" : {
"filename" : "/dev/nvme0n1",
"ramp_time" : "10s",
"runtime" : "30s",
"iodepth" : "64",
"numjobs" : "4",
"randrepeat" : "1",
"thread" : "1",
"ioengine" : "libaio",
"direct" : "1",
"norandommap" : "1",
"lat_percentiles" : "1",
"group_reporting" : "1"
},
"jobs" : [
{
"jobname" : "rand_read_write",
"groupid" : 0,
"error" : 0,
"eta" : 0,
"elapsed" : 41,
"job options" : {
"name" : "rand_read",
"rw" : "randrw",
"bs" : "4096",
"time_based" : "1"
},
"read" : {
"io_bytes" : 10463010816,
"io_kbytes" : 10217784,
"bw_bytes" : 348743777,
"bw" : 340570,
"iops" : 85138.890741,
"runtime" : 30002,
"total_ios" : 2554337,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 1332,
"max" : 48691,
"mean" : 2032.588341,
"stddev" : 864.921965
},
"clat_ns" : {
"min" : 278533,
"max" : 10175655,
"mean" : 1444476.063469,
"stddev" : 300748.583131
},
"lat_ns" : {
"min" : 280646,
"max" : 10177629,
"mean" : 1446562.147113,
"stddev" : 300723.879349,
"percentile" : {
"1.000000" : 872448,
"5.000000" : 1036288,
"10.000000" : 1122304,
"20.000000" : 1220608,
"30.000000" : 1286144,
"40.000000" : 1351680,
"50.000000" : 1417216,
"60.000000" : 1482752,
"70.000000" : 1564672,
"80.000000" : 1662976,
"90.000000" : 1810432,
"95.000000" : 1941504,
"99.000000" : 2244608,
"99.500000" : 2408448,
"99.900000" : 3620864,
"99.950000" : 4358144,
"99.990000" : 6062080
}
},
"bw_min" : 291288,
"bw_max" : 380288,
"bw_agg" : 99.999134,
"bw_mean" : 340567.050000,
"bw_dev" : 6222.338382,
"bw_samples" : 240,
"iops_min" : 72822,
"iops_max" : 95072,
"iops_mean" : 85141.733333,
"iops_stddev" : 1555.582888,
"iops_samples" : 240
},
"write" : {
"io_bytes" : 10454208512,
"io_kbytes" : 10209188,
"bw_bytes" : 348450387,
"bw" : 340283,
"iops" : 85066.128925,
"runtime" : 30002,
"total_ios" : 2552154,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 1383,
"max" : 315361,
"mean" : 2182.824623,
"stddev" : 919.625590
},
"clat_ns" : {
"min" : 433904,
"max" : 6300941,
"mean" : 1558511.433458,
"stddev" : 207734.850159
},
"lat_ns" : {
"min" : 441909,
"max" : 6302845,
"mean" : 1560749.444938,
"stddev" : 207695.144244,
"percentile" : {
"1.000000" : 1155072,
"5.000000" : 1269760,
"10.000000" : 1318912,
"20.000000" : 1384448,
"30.000000" : 1449984,
"40.000000" : 1499136,
"50.000000" : 1531904,
"60.000000" : 1597440,
"70.000000" : 1646592,
"80.000000" : 1728512,
"90.000000" : 1826816,
"95.000000" : 1908736,
"99.000000" : 2072576,
"99.500000" : 2179072,
"99.900000" : 2605056,
"99.950000" : 3031040,
"99.990000" : 4358144
}
},
"bw_min" : 288464,
"bw_max" : 380080,
"bw_agg" : 99.998134,
"bw_mean" : 340276.650000,
"bw_dev" : 6293.894521,
"bw_samples" : 240,
"iops_min" : 72116,
"iops_max" : 95020,
"iops_mean" : 85069.133333,
"iops_stddev" : 1573.475038,
"iops_samples" : 240
},
"trim" : {
"io_bytes" : 0,
"io_kbytes" : 0,
"bw_bytes" : 0,
"bw" : 0,
"iops" : 0.000000,
"runtime" : 0,
"total_ios" : 0,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"clat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"lat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000,
"percentile" : {
"1.000000" : 0,
"5.000000" : 0,
"10.000000" : 0,
"20.000000" : 0,
"30.000000" : 0,
"40.000000" : 0,
"50.000000" : 0,
"60.000000" : 0,
"70.000000" : 0,
"80.000000" : 0,
"90.000000" : 0,
"95.000000" : 0,
"99.000000" : 0,
"99.500000" : 0,
"99.900000" : 0,
"99.950000" : 0,
"99.990000" : 0
}
},
"bw_min" : 0,
"bw_max" : 0,
"bw_agg" : 0.000000,
"bw_mean" : 0.000000,
"bw_dev" : 0.000000,
"bw_samples" : 0,
"iops_min" : 0,
"iops_max" : 0,
"iops_mean" : 0.000000,
"iops_stddev" : 0.000000,
"iops_samples" : 0
},
"sync" : {
"lat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"total_ios" : 0
},
"job_runtime" : 120004,
"usr_cpu" : 4.833172,
"sys_cpu" : 20.800973,
"ctx" : 3542118,
"majf" : 0,
"minf" : 1263,
"iodepth_level" : {
"1" : 0.000000,
"2" : 0.000000,
"4" : 0.000000,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
">=64" : 100.000000
},
"iodepth_submit" : {
"0" : 0.000000,
"4" : 100.000000,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
"64" : 0.000000,
">=64" : 0.000000
},
"iodepth_complete" : {
"0" : 0.000000,
"4" : 99.999922,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
"64" : 0.100000,
">=64" : 0.000000
},
"latency_ns" : {
"2" : 0.000000,
"4" : 0.000000,
"10" : 0.000000,
"20" : 0.000000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.000000,
"750" : 0.000000,
"1000" : 0.000000
},
"latency_us" : {
"2" : 0.000000,
"4" : 0.000000,
"10" : 0.000000,
"20" : 0.000000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.010000,
"750" : 0.070126,
"1000" : 1.756079
},
"latency_ms" : {
"2" : 95.414131,
"4" : 2.722457,
"10" : 0.040830,
"20" : 0.010000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.000000,
"750" : 0.000000,
"1000" : 0.000000,
"2000" : 0.000000,
">=2000" : 0.000000
},
"latency_depth" : 64,
"latency_target" : 0,
"latency_percentile" : 100.000000,
"latency_window" : 0
}
],
"disk_util" : [
{
"name" : "nvme0n1",
"read_ios" : 3004914,
"write_ios" : 3003760,
"read_merges" : 0,
"write_merges" : 0,
"read_ticks" : 4269143,
"write_ticks" : 4598453,
"in_queue" : 11104,
"util" : 99.840351
}
]
}
"""
jobname_prefix = 'nvme0n1_rand_read_write'
assert (benchmark._process_raw_result(0, test_raw_output))
assert (benchmark.return_code == ReturnCode.SUCCESS)
......
......@@ -3,66 +3,27 @@
"""Tests for GPCNet benchmark."""
import os
import numbers
import unittest
from pathlib import Path
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType
class GPCNetBenchmarkTest(unittest.TestCase): # noqa: E501
class GPCNetBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for GPCNetBenchmark benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench'
binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
Path(binary_path).mkdir(parents=True, exist_ok=True)
self.__binary_files = []
for bin_name in ['network_test', 'network_load_test']:
self.__binary_files.append(Path(binary_path, bin_name))
Path(binary_path, bin_name).touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
for bin_file in self.__binary_files:
bin_file.unlink()
def test_gpcnet_network_test(self):
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/network_test', 'bin/network_load_test'])
@decorator.load_data('tests/data/gpcnet_network_test.log')
@decorator.load_data('tests/data/gpcnet_network_test_error.log')
def test_gpcnet_network_test(self, raw_output, raw_output_no_execution):
"""Test gpcnet-network-test benchmark."""
raw_output = """# noqa: E501
Network Tests v1.3
Test with 2 MPI ranks (2 nodes)
Legend
RR = random ring communication pattern
Nat = natural ring communication pattern
Lat = latency
BW = bandwidth
BW+Sync = bandwidth with barrier
+------------------------------------------------------------------------------+
| Isolated Network Tests |
+---------------------------------+--------------+--------------+--------------+
| Name | Avg | 99% | Units |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| RR Get Lat (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| RR Put BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| Nat Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| Multiple Alltoall (4096 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
"""
# Check registry.
benchmark_name = 'gpcnet-network-test'
(benchmark_class,
......@@ -78,20 +39,6 @@ def test_gpcnet_network_test(self):
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command)
raw_output_no_execution = """
ERROR: this application must be run on at least 2 nodes
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:
Process name: [[63697,1],0]
Exit code: 1
--------------------------------------------------------------------------
"""
assert (benchmark._process_raw_result(0, raw_output_no_execution))
assert (len(benchmark.result) == benchmark.default_metric_count)
......@@ -123,107 +70,10 @@ def test_gpcnet_network_test(self):
assert (benchmark.type == BenchmarkType.MICRO)
assert (benchmark._bin_name == 'network_test')
def test_gpcnet_network_load(self): # noqa: C901
@decorator.load_data('tests/data/gpcnet_network_load.log')
@decorator.load_data('tests/data/gpcnet_network_load_error.log')
def test_gpcnet_network_load(self, raw_output, raw_output_no_execution):
"""Test gpcnet-network-load-test benchmark."""
raw_output = """# noqa: E501
NetworkLoad Tests v1.3
Test with 10 MPI ranks (10 nodes)
2 nodes running Network Tests
8 nodes running Congestion Tests (min 100 nodes per congestor)
Legend
RR = random ring communication pattern
Lat = latency
BW = bandwidth
BW+Sync = bandwidth with barrier
+------------------------------------------------------------------------------------------------------------------------------------------+
| Isolated Network Tests |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Isolated Congestion Tests |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests ( RR Two-sided Lat Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests (RR Two-sided BW+Sync Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests ( Multiple Allreduce Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests - Key Results |
+---------------------------------+--------------------------------------------+
| Name | Congestion Impact Factor |
+---------------------------------+----------------------+---------------------+
| | Avg | 99% |
+---------------------------------+----------------------+---------------------+
| RR Two-sided Lat (8 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
| RR Two-sided BW+Sync (131072 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
| Multiple Allreduce (8 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
"""
# Check registry.
benchmark_name = 'gpcnet-network-load-test'
(benchmark_class,
......@@ -240,20 +90,6 @@ def test_gpcnet_network_load(self): # noqa: C901
assert (command == expect_command)
# Check function process_raw_data.
raw_output_no_execution = """
ERROR: this application must be run on at least 10 nodes
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:
Process name: [[63697,1],0]
Exit code: 1
--------------------------------------------------------------------------
"""
assert (benchmark._process_raw_result(0, raw_output_no_execution))
assert (len(benchmark.result) == benchmark.default_metric_count)
# Positive case - valid raw output.
......
......@@ -3,29 +3,22 @@
"""Tests for gpu-copy-bw benchmark."""
from pathlib import Path
import numbers
import os
import unittest
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
class GpuCopyBwBenchmarkTest(unittest.TestCase):
class GpuCopyBwBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Test class for gpu-copy-bw benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = binary_path / 'gpu_copy'
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/gpu_copy'])
def _test_gpu_copy_bw_performance_command_generation(self, platform):
"""Test gpu-copy benchmark command generation."""
......
......@@ -6,113 +6,42 @@
import os
import numbers
import unittest
from pathlib import Path
from unittest import mock
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType, ReturnCode
from superbench.common.utils import network
from superbench.benchmarks.micro_benchmarks import ib_loopback_performance
class IBLoopbackBenchmarkTest(unittest.TestCase):
class IBLoopbackBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for IBLoopbackBenchmark benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
if (len(network.get_ib_devices()) < 1):
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(binary_path, 'run_perftest_loopback')
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
if (len(network.get_ib_devices()) < 1):
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/run_perftest_loopback'])
def test_ib_loopback_util(self):
"""Test util functions 'get_numa_cores' and 'get_free_port' used in ib-loopback benchmark."""
port = network.get_free_port()
assert (isinstance(port, numbers.Number))
numa_cores = ib_loopback_performance.get_numa_cores(0)
if numa_cores is None:
# in case no NUMA support available on test system
return
assert (len(numa_cores) >= 2)
for i in range(len(numa_cores)):
assert (isinstance(numa_cores[i], numbers.Number))
@decorator.load_data('tests/data/ib_loopback_all_sizes.log')
@mock.patch('superbench.common.utils.network.get_free_port')
@mock.patch('superbench.benchmarks.micro_benchmarks.ib_loopback_performance.get_numa_cores')
@mock.patch('superbench.common.utils.network.get_ib_devices')
def test_ib_loopback_all_sizes(self, mock_ib_devices, mock_numa_cores, mock_port):
def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_cores, mock_port):
"""Test ib-loopback benchmark for all sizes."""
raw_output = """
************************************
* Waiting for client to connect... *
************************************
---------------------------------------------------------------------------------------
RDMA_Write BW Test
Dual-port : OFF Device : ibP257p0s0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
---------------------------------------------------------------------------------------
RDMA_Write BW Test
Dual-port : OFF Device : ibP257p0s0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
ibv_wr* API : ON
TX depth : 128
CQ Moderation : 100
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
ibv_wr* API : ON
CQ Moderation : 100
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000
local address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000
remote address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000
remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000
---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
2 2000 5.32 5.30 2.778732
4 2000 10.65 10.64 2.788833
8 2000 21.30 21.27 2.787609
16 2000 42.60 42.55 2.788268
32 2000 84.90 82.82 2.713896
64 2000 173.55 171.66 2.812504
128 2000 362.27 353.83 2.898535
256 2000 687.82 679.37 2.782698
512 2000 1337.12 1311.59 2.686135
1024 2000 2674.25 2649.39 2.712980
2048 2000 5248.56 5118.18 2.620509
4096 2000 10034.02 9948.41 2.546793
8192 2000 18620.51 12782.56 1.636168
16384 2000 23115.27 16782.50 1.074080
32768 2000 22927.94 18586.03 0.594753
65536 2000 23330.56 21167.79 0.338685
131072 2000 22750.35 21443.14 0.171545
262144 2000 22673.63 22411.35 0.089645
524288 2000 22679.02 22678.86 0.045358
1048576 2000 22817.06 22816.86 0.022817
2097152 2000 22919.37 22919.27 0.011460
4194304 2000 23277.93 23277.91 0.005819
8388608 2000 23240.68 23240.68 0.002905
---------------------------------------------------------------------------------------
8388608 2000 23240.68 23240.68 0.002905
---------------------------------------------------------------------------------------
"""
# Test without ib devices
# Check registry.
benchmark_name = 'ib-loopback'
......@@ -179,56 +108,12 @@ def test_ib_loopback_all_sizes(self, mock_ib_devices, mock_numa_cores, mock_port
assert (benchmark._args.iters == 2000)
assert (benchmark._args.commands == ['write'])
@decorator.load_data('tests/data/ib_loopback_8M_size.log')
@mock.patch('superbench.common.utils.network.get_free_port')
@mock.patch('superbench.benchmarks.micro_benchmarks.ib_loopback_performance.get_numa_cores')
@mock.patch('superbench.common.utils.network.get_ib_devices')
def test_ib_loopback_8M_size(self, mock_ib_devices, mock_numa_cores, mock_port):
def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores, mock_port):
"""Test ib-loopback benchmark for 8M size."""
raw_output = """
RDMA_Write BW Test
Dual-port : OFF Device : ibP257p0s0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
TX depth : 128
CQ Moderation : 1
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000
remote address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000
---------------------------------------------------------------------------------------
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
8388608 20000 24056.74 24056.72 0.003007
************************************
* Waiting for client to connect... *
************************************
---------------------------------------------------------------------------------------
RDMA_Write BW Test
Dual-port : OFF Device : ibP257p0s0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
CQ Moderation : 1
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000
remote address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000
---------------------------------------------------------------------------------------
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
8388608 20000 24056.74 24056.72 0.003007
---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
"""
# Test without ib devices
# Check registry.
benchmark_name = 'ib-loopback'
......
......@@ -10,26 +10,26 @@
from unittest import mock
from collections import defaultdict
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType, ReturnCode
class IBBenchmarkTest(unittest.TestCase):
class IBBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for IBBenchmark benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(binary_path, 'ib_validation')
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/ib_validation'])
@classmethod
def tearDownClass(cls):
"""Hook method for deconstructing the class fixture after running all tests in the class."""
p = Path('hostfile')
if p.is_file():
p.unlink()
super().tearDownClass()
def test_generate_config(self): # noqa: C901
"""Test util functions ."""
......@@ -117,8 +117,9 @@ def read_config(filename):
Path(test_config_file).unlink()
@mock.patch('superbench.common.devices.GPU.vendor', new_callable=mock.PropertyMock)
@mock.patch('superbench.common.utils.network.get_ib_devices')
def test_ib_traffic_performance(self, mock_ib_devices):
def test_ib_traffic_performance(self, mock_ib_devices, mock_gpu):
"""Test ib-traffic benchmark."""
# Test without ib devices
# Check registry.
......@@ -168,6 +169,22 @@ def test_ib_traffic_performance(self, mock_ib_devices):
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command)
parameters = '--ib_index 0 --iters 2000 --pattern one-to-one --hostfile hostfile --gpu_index 0'
mock_gpu.return_value = 'nvidia'
benchmark = benchmark_class(benchmark_name, parameters=parameters)
ret = benchmark._preprocess()
expect_command = 'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F ' + \
'--iters=2000 -d mlx5_0 -a --use_cuda=0" --input_config ' + os.getcwd() + '/config.txt'
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command)
mock_gpu.return_value = 'amd'
benchmark = benchmark_class(benchmark_name, parameters=parameters)
ret = benchmark._preprocess()
expect_command = 'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F ' + \
'--iters=2000 -d mlx5_0 -a --use_rocm=0" --input_config ' + os.getcwd() + '/config.txt'
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command)
# Custom config
config = ['0,1', '1,0;0,1', '0,1;1,0', '1,0;0,1']
with open('test_config.txt', 'w') as f:
......
......@@ -3,27 +3,20 @@
"""Tests for gemm-flops benchmark."""
import os
import unittest
from pathlib import Path
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, ReturnCode, Platform, BenchmarkType
class RocmGemmFlopsTest(unittest.TestCase):
class RocmGemmFlopsTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for RocmGemmFlops benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
Path(binary_path).mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(os.path.join(binary_path, 'rocblas-bench'))
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/rocblas-bench'])
def test_rocm_flops_performance(self):
"""Test gemm-flops benchmark."""
......
......@@ -4,29 +4,25 @@
"""Tests for mem-bw benchmark."""
import numbers
from pathlib import Path
import os
import unittest
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
class RocmMemBwTest(unittest.TestCase):
class RocmMemBwTest(BenchmarkTestCase, unittest.TestCase):
"""Test class for rocm mem-bw benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
Path(os.getenv('SB_MICRO_PATH'), 'bin').mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(binary_path, 'hipBusBandwidth')
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
def test_rocm_memory_bw_performance(self):
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/hipBusBandwidth'])
@decorator.load_data('tests/data/rocm_memory_h2d_bw.log')
@decorator.load_data('tests/data/rocm_memory_d2h_bw.log')
def test_rocm_memory_bw_performance(self, raw_output_h2d, raw_output_d2h):
"""Test rocm mem-bw benchmark."""
benchmark_name = 'mem-bw'
(benchmark_class,
......@@ -51,114 +47,7 @@ def test_rocm_memory_bw_performance(self):
assert (commnad == expected_command[i])
# Check results and metrics.
raw_output = {}
raw_output[0] = """
Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned
test atts units median mean stddev min max
H2D_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
H2D_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
H2D_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
H2D_Bandwidth_pinned 1kB GB/sec 0.0414 0.0411 0.0017 0.0189 0.0434
H2D_Bandwidth_pinned 2kB GB/sec 0.0828 0.0824 0.0018 0.0683 0.0862
H2D_Bandwidth_pinned 4kB GB/sec 0.1656 0.1652 0.0032 0.1374 0.1724
H2D_Bandwidth_pinned 8kB GB/sec 0.3268 0.3251 0.0117 0.1880 0.3425
H2D_Bandwidth_pinned 16kB GB/sec 0.6410 0.6365 0.0259 0.3597 0.6757
H2D_Bandwidth_pinned 32kB GB/sec 1.2422 1.2432 0.0278 0.9346 1.2987
H2D_Bandwidth_pinned 64kB GB/sec 2.3968 2.4161 0.1486 0.7242 2.6042
H2D_Bandwidth_pinned 128kB GB/sec 4.6786 4.6339 0.1310 4.1143 4.8162
H2D_Bandwidth_pinned 256kB GB/sec 7.8349 7.8369 0.1150 6.9093 8.0270
H2D_Bandwidth_pinned 512kB GB/sec 11.9963 11.9828 0.1287 11.2158 12.2201
H2D_Bandwidth_pinned 1024kB GB/sec 16.3342 16.3315 0.0956 16.0147 16.5823
H2D_Bandwidth_pinned 2048kB GB/sec 19.9790 19.9770 0.0853 19.7681 20.1635
H2D_Bandwidth_pinned 4096kB GB/sec 22.2706 22.2642 0.0552 22.0644 22.3847
H2D_Bandwidth_pinned 8192kB GB/sec 22.8232 22.7881 0.1669 21.3196 22.8930
H2D_Bandwidth_pinned 16384kB GB/sec 24.1521 24.1411 0.0429 24.0165 24.2162
H2D_Bandwidth_pinned 32768kB GB/sec 24.8695 24.7086 0.7491 20.6288 24.9035
H2D_Bandwidth_pinned 65536kB GB/sec 24.4840 24.0101 2.5769 6.1754 24.5292
H2D_Bandwidth_pinned 131072kB GB/sec 25.0487 24.9593 0.2601 24.1286 25.0711
H2D_Bandwidth_pinned 262144kB GB/sec 25.3280 25.2351 0.1788 24.8746 25.3498
H2D_Bandwidth_pinned 524288kB GB/sec 24.7523 24.6708 0.1586 24.3154 24.7880
H2D_Timepinned +064By ms 0.0245 0.0253 0.0240 0.0232 0.7821
H2D_Timepinned +256By ms 0.0243 0.0244 0.0013 0.0232 0.0546
H2D_Timepinned +512By ms 0.0243 0.0244 0.0014 0.0230 0.0566
H2D_Timepinned 1kB ms 0.0242 0.0244 0.0016 0.0230 0.0530
H2D_Timepinned 2kB ms 0.0242 0.0243 0.0005 0.0232 0.0293
H2D_Timepinned 4kB ms 0.0242 0.0242 0.0005 0.0232 0.0291
H2D_Timepinned 8kB ms 0.0245 0.0247 0.0013 0.0234 0.0426
H2D_Timepinned 16kB ms 0.0250 0.0252 0.0015 0.0237 0.0445
H2D_Timepinned 32kB ms 0.0258 0.0258 0.0006 0.0246 0.0342
H2D_Timepinned 64kB ms 0.0271 0.0272 0.0045 0.0250 0.0898
H2D_Timepinned 128kB ms 0.0280 0.0283 0.0008 0.0272 0.0318
H2D_Timepinned 256kB ms 0.0334 0.0334 0.0005 0.0326 0.0379
H2D_Timepinned 512kB ms 0.0437 0.0437 0.0005 0.0429 0.0467
H2D_Timepinned 1024kB ms 0.0642 0.0642 0.0004 0.0632 0.0654
H2D_Timepinned 2048kB ms 0.1050 0.1050 0.0004 0.1040 0.1061
H2D_Timepinned 4096kB ms 0.1883 0.1884 0.0005 0.1874 0.1901
H2D_Timepinned 8192kB ms 0.3675 0.3681 0.0028 0.3664 0.3934
H2D_Timepinned 16384kB ms 0.6946 0.6950 0.0012 0.6928 0.6986
H2D_Timepinned 32768kB ms 1.3492 1.3595 0.0482 1.3474 1.6266
H2D_Timepinned 65536kB ms 2.7409 2.9163 1.1368 2.7358 10.8670
H2D_Timepinned 131072kB ms 5.3582 5.3780 0.0576 5.3534 5.5626
H2D_Timepinned 262144kB ms 10.5983 10.6379 0.0761 10.5892 10.7915
H2D_Timepinned 524288kB ms 21.6897 21.7622 0.1411 21.6585 22.0794
Note: results marked with (*) had missing values such as
might occur with a mixture of architectural capabilities.
"""
raw_output[1] = """
Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned
test atts units median mean stddev min max
D2H_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
D2H_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
D2H_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
D2H_Bandwidth_pinned 1kB GB/sec 0.0428 0.0426 0.0019 0.0114 0.0446
D2H_Bandwidth_pinned 2kB GB/sec 0.0850 0.0844 0.0034 0.0415 0.0893
D2H_Bandwidth_pinned 4kB GB/sec 0.1701 0.1687 0.0084 0.0504 0.1773
D2H_Bandwidth_pinned 8kB GB/sec 0.3378 0.3348 0.0168 0.1085 0.3546
D2H_Bandwidth_pinned 16kB GB/sec 0.6667 0.6606 0.0218 0.5618 0.6897
D2H_Bandwidth_pinned 32kB GB/sec 1.3072 1.2954 0.0663 0.5682 1.3605
D2H_Bandwidth_pinned 64kB GB/sec 2.5550 2.5339 0.0955 2.1382 2.6904
D2H_Bandwidth_pinned 128kB GB/sec 4.8162 4.7807 0.2331 2.0940 4.9621
D2H_Bandwidth_pinned 256kB GB/sec 8.2286 8.2192 0.1671 7.2456 8.5286
D2H_Bandwidth_pinned 512kB GB/sec 12.7930 12.7062 0.4407 7.1196 13.0478
D2H_Bandwidth_pinned 1024kB GB/sec 17.5603 17.4938 0.3921 12.7184 17.7989
D2H_Bandwidth_pinned 2048kB GB/sec 21.6275 21.5591 0.2233 20.6073 21.8076
D2H_Bandwidth_pinned 4096kB GB/sec 24.2708 24.2556 0.0942 23.5724 24.4292
D2H_Bandwidth_pinned 8192kB GB/sec 24.9287 24.9093 0.0733 24.7171 25.0359
D2H_Bandwidth_pinned 16384kB GB/sec 26.4588 26.1976 2.4387 1.9387 26.5191
D2H_Bandwidth_pinned 32768kB GB/sec 27.2939 27.1202 0.7941 23.2086 27.3277
D2H_Bandwidth_pinned 65536kB GB/sec 26.8278 26.7238 0.3894 24.7946 26.9000
D2H_Bandwidth_pinned 131072kB GB/sec 27.4751 27.3457 0.3968 25.4168 27.5098
D2H_Bandwidth_pinned 262144kB GB/sec 27.8236 27.7173 0.3072 26.7977 27.8525
D2H_Bandwidth_pinned 524288kB GB/sec 28.0193 27.9348 0.1912 27.4707 28.0314
D2H_Time_pinned +064By ms 0.0229 0.0246 0.0457 0.0216 1.4690
D2H_Time_pinned +256By ms 0.0232 0.0234 0.0013 0.0221 0.0378
D2H_Time_pinned +512By ms 0.0234 0.0238 0.0063 0.0224 0.2091
D2H_Time_pinned 1kB ms 0.0234 0.0236 0.0028 0.0224 0.0875
D2H_Time_pinned 2kB ms 0.0235 0.0237 0.0014 0.0224 0.0482
D2H_Time_pinned 4kB ms 0.0235 0.0239 0.0031 0.0226 0.0794
D2H_Time_pinned 8kB ms 0.0237 0.0240 0.0027 0.0226 0.0738
D2H_Time_pinned 16kB ms 0.0240 0.0242 0.0009 0.0232 0.0285
D2H_Time_pinned 32kB ms 0.0245 0.0248 0.0021 0.0235 0.0563
D2H_Time_pinned 64kB ms 0.0254 0.0257 0.0011 0.0242 0.0304
D2H_Time_pinned 128kB ms 0.0272 0.0275 0.0026 0.0264 0.0626
D2H_Time_pinned 256kB ms 0.0318 0.0319 0.0007 0.0307 0.0362
D2H_Time_pinned 512kB ms 0.0410 0.0413 0.0024 0.0402 0.0736
D2H_Time_pinned 1024kB ms 0.0597 0.0599 0.0017 0.0589 0.0824
D2H_Time_pinned 2048kB ms 0.0970 0.0973 0.0010 0.0962 0.1018
D2H_Time_pinned 4096kB ms 0.1728 0.1729 0.0007 0.1717 0.1779
D2H_Time_pinned 8192kB ms 0.3365 0.3367 0.0010 0.3350 0.3394
D2H_Time_pinned 16384kB ms 0.6341 0.7147 0.7979 0.6326 8.6538
D2H_Time_pinned 32768kB ms 1.2294 1.2385 0.0420 1.2278 1.4458
D2H_Time_pinned 65536kB ms 2.5014 2.5117 0.0391 2.4947 2.7066
D2H_Time_pinned 131072kB ms 4.8850 4.9092 0.0748 4.8789 5.2806
D2H_Time_pinned 262144kB ms 9.6478 9.6860 0.1106 9.6377 10.0171
D2H_Time_pinned 524288kB ms 19.1607 19.2196 0.1333 19.1525 19.5434
Note: results marked with (*) had missing values such as
might occur with a mixture of architectural capabilities.
"""
raw_output = [raw_output_h2d, raw_output_d2h]
for i, metric in enumerate(['h2d_bw', 'd2h_bw']):
assert (benchmark._process_raw_result(i, raw_output[i]))
assert (metric in benchmark.result)
......
......@@ -3,36 +3,29 @@
"""Tests for tensorrt-inference benchmark."""
import os
import shutil
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
from superbench.benchmarks.result import BenchmarkResult
class TensorRTInferenceBenchmarkTestCase(unittest.TestCase):
class TensorRTInferenceBenchmarkTestCase(BenchmarkTestCase, unittest.TestCase):
"""Class for tensorrt-inferencee benchmark test cases."""
def setUp(self):
"""Hook method for setting up the test fixture before exercising it."""
self.benchmark_name = 'tensorrt-inference'
self.__tmp_dir = tempfile.mkdtemp()
self.__model_path = Path(self.__tmp_dir) / 'hub' / 'onnx'
self.__curr_micro_path = os.environ.get('SB_MICRO_PATH', '')
os.environ['TORCH_HOME'] = self.__tmp_dir
os.environ['SB_MICRO_PATH'] = self.__tmp_dir
(Path(self.__tmp_dir) / 'bin').mkdir(parents=True, exist_ok=True)
(Path(self.__tmp_dir) / 'bin' / 'trtexec').touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Hook method for deconstructing the test fixture after testing it."""
shutil.rmtree(self.__tmp_dir)
os.environ['SB_MICRO_PATH'] = self.__curr_micro_path
del os.environ['TORCH_HOME']
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.benchmark_name = 'tensorrt-inference'
cls._model_path = Path(cls._tmp_dir) / 'hub' / 'onnx'
cls.createMockEnvs(cls, {
'TORCH_HOME': cls._tmp_dir,
'SB_MICRO_PATH': cls._tmp_dir,
})
cls.createMockFiles(cls, ['bin/trtexec'])
def test_tensorrt_inference_cls(self):
"""Test tensorrt-inference benchmark class."""
......@@ -116,7 +109,7 @@ def test_tensorrt_inference_params(self):
# Check models
for model in benchmark._args.pytorch_models:
self.assertTrue((self.__model_path / f'{model}.onnx').is_file())
self.assertTrue((self._model_path / f'{model}.onnx').is_file())
# Command list should equal to default model number
self.assertEqual(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment