Unverified Commit 020a63c6 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Tests - Refine test cases for microbenchmark (#268)

__Description__

Refine test cases for microbenchmark:
* Refine test fixture, add BenchmarkTestCase class.
* Refine test data.
* Resolve no numa issue for test_ib_loopback_util case.
parent a15f773b
......@@ -3,29 +3,20 @@
"""Tests for cpu-memory-bw-latency benchmark."""
from pathlib import Path
import os
import unittest
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
class CpuMemBwLatencyBenchmarkTest(unittest.TestCase):
class CpuMemBwLatencyBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Test class for cpu-memory-bw-latency benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
self.__curr_micro_path = os.environ.get('SB_MICRO_PATH', '')
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = binary_path / 'mlc'
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
os.environ['SB_MICRO_PATH'] = self.__curr_micro_path
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/mlc'])
def test_cpu_mem_bw_latency_benchmark_empty_param(self):
"""Test cpu-memory-bw-latency benchmark command generation with empty parameter."""
......
......@@ -3,29 +3,22 @@
"""Tests for gemm-flops benchmark."""
import os
import unittest
from pathlib import Path
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.common.utils import device_manager as dm
from superbench.benchmarks import BenchmarkRegistry, ReturnCode, Platform, BenchmarkType
class CudaGemmFlopsBenchmarkTest(unittest.TestCase):
class CudaGemmFlopsBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for CudaGemmFlopsBenchmark benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
Path(binary_path).mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(os.path.join(binary_path, 'cutlass_profiler'))
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/cutlass_profiler'])
@decorator.cuda_test
def test_flops_performance_cuda(self):
......
......@@ -3,28 +3,22 @@
"""Tests for disk-performance benchmark."""
from pathlib import Path
from unittest import mock
import os
import unittest
from unittest import mock
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
class DiskBenchmarkTest(unittest.TestCase):
class DiskBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Test class for disk-performance benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = binary_path / 'fio'
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/fio'])
def test_disk_performance_empty_param(self):
"""Test disk-performance benchmark command generation with empty parameter."""
......@@ -178,7 +172,8 @@ def test_disk_performance_benchmark_enabled(self, mock_is_block_device):
assert ('--rwmixread=%d' % default_rwmixread in benchmark._commands[command_idx])
command_idx += 1
def test_disk_performance_result_parsing(self):
@decorator.load_data('tests/data/disk_performance.log')
def test_disk_performance_result_parsing(self, test_raw_output):
"""Test disk-performance benchmark result parsing."""
benchmark_name = 'disk-benchmark'
(benchmark_class,
......@@ -193,317 +188,6 @@ def test_disk_performance_result_parsing(self):
assert (benchmark.type == BenchmarkType.MICRO)
# Positive case - valid raw output.
test_raw_output = """
{
"fio version" : "fio-3.16",
"timestamp" : 1626763278,
"timestamp_ms" : 1626763278577,
"time" : "Tue Jul 20 06:41:18 2021",
"global options" : {
"filename" : "/dev/nvme0n1",
"ramp_time" : "10s",
"runtime" : "30s",
"iodepth" : "64",
"numjobs" : "4",
"randrepeat" : "1",
"thread" : "1",
"ioengine" : "libaio",
"direct" : "1",
"norandommap" : "1",
"lat_percentiles" : "1",
"group_reporting" : "1"
},
"jobs" : [
{
"jobname" : "rand_read_write",
"groupid" : 0,
"error" : 0,
"eta" : 0,
"elapsed" : 41,
"job options" : {
"name" : "rand_read",
"rw" : "randrw",
"bs" : "4096",
"time_based" : "1"
},
"read" : {
"io_bytes" : 10463010816,
"io_kbytes" : 10217784,
"bw_bytes" : 348743777,
"bw" : 340570,
"iops" : 85138.890741,
"runtime" : 30002,
"total_ios" : 2554337,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 1332,
"max" : 48691,
"mean" : 2032.588341,
"stddev" : 864.921965
},
"clat_ns" : {
"min" : 278533,
"max" : 10175655,
"mean" : 1444476.063469,
"stddev" : 300748.583131
},
"lat_ns" : {
"min" : 280646,
"max" : 10177629,
"mean" : 1446562.147113,
"stddev" : 300723.879349,
"percentile" : {
"1.000000" : 872448,
"5.000000" : 1036288,
"10.000000" : 1122304,
"20.000000" : 1220608,
"30.000000" : 1286144,
"40.000000" : 1351680,
"50.000000" : 1417216,
"60.000000" : 1482752,
"70.000000" : 1564672,
"80.000000" : 1662976,
"90.000000" : 1810432,
"95.000000" : 1941504,
"99.000000" : 2244608,
"99.500000" : 2408448,
"99.900000" : 3620864,
"99.950000" : 4358144,
"99.990000" : 6062080
}
},
"bw_min" : 291288,
"bw_max" : 380288,
"bw_agg" : 99.999134,
"bw_mean" : 340567.050000,
"bw_dev" : 6222.338382,
"bw_samples" : 240,
"iops_min" : 72822,
"iops_max" : 95072,
"iops_mean" : 85141.733333,
"iops_stddev" : 1555.582888,
"iops_samples" : 240
},
"write" : {
"io_bytes" : 10454208512,
"io_kbytes" : 10209188,
"bw_bytes" : 348450387,
"bw" : 340283,
"iops" : 85066.128925,
"runtime" : 30002,
"total_ios" : 2552154,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 1383,
"max" : 315361,
"mean" : 2182.824623,
"stddev" : 919.625590
},
"clat_ns" : {
"min" : 433904,
"max" : 6300941,
"mean" : 1558511.433458,
"stddev" : 207734.850159
},
"lat_ns" : {
"min" : 441909,
"max" : 6302845,
"mean" : 1560749.444938,
"stddev" : 207695.144244,
"percentile" : {
"1.000000" : 1155072,
"5.000000" : 1269760,
"10.000000" : 1318912,
"20.000000" : 1384448,
"30.000000" : 1449984,
"40.000000" : 1499136,
"50.000000" : 1531904,
"60.000000" : 1597440,
"70.000000" : 1646592,
"80.000000" : 1728512,
"90.000000" : 1826816,
"95.000000" : 1908736,
"99.000000" : 2072576,
"99.500000" : 2179072,
"99.900000" : 2605056,
"99.950000" : 3031040,
"99.990000" : 4358144
}
},
"bw_min" : 288464,
"bw_max" : 380080,
"bw_agg" : 99.998134,
"bw_mean" : 340276.650000,
"bw_dev" : 6293.894521,
"bw_samples" : 240,
"iops_min" : 72116,
"iops_max" : 95020,
"iops_mean" : 85069.133333,
"iops_stddev" : 1573.475038,
"iops_samples" : 240
},
"trim" : {
"io_bytes" : 0,
"io_kbytes" : 0,
"bw_bytes" : 0,
"bw" : 0,
"iops" : 0.000000,
"runtime" : 0,
"total_ios" : 0,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"clat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"lat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000,
"percentile" : {
"1.000000" : 0,
"5.000000" : 0,
"10.000000" : 0,
"20.000000" : 0,
"30.000000" : 0,
"40.000000" : 0,
"50.000000" : 0,
"60.000000" : 0,
"70.000000" : 0,
"80.000000" : 0,
"90.000000" : 0,
"95.000000" : 0,
"99.000000" : 0,
"99.500000" : 0,
"99.900000" : 0,
"99.950000" : 0,
"99.990000" : 0
}
},
"bw_min" : 0,
"bw_max" : 0,
"bw_agg" : 0.000000,
"bw_mean" : 0.000000,
"bw_dev" : 0.000000,
"bw_samples" : 0,
"iops_min" : 0,
"iops_max" : 0,
"iops_mean" : 0.000000,
"iops_stddev" : 0.000000,
"iops_samples" : 0
},
"sync" : {
"lat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"total_ios" : 0
},
"job_runtime" : 120004,
"usr_cpu" : 4.833172,
"sys_cpu" : 20.800973,
"ctx" : 3542118,
"majf" : 0,
"minf" : 1263,
"iodepth_level" : {
"1" : 0.000000,
"2" : 0.000000,
"4" : 0.000000,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
">=64" : 100.000000
},
"iodepth_submit" : {
"0" : 0.000000,
"4" : 100.000000,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
"64" : 0.000000,
">=64" : 0.000000
},
"iodepth_complete" : {
"0" : 0.000000,
"4" : 99.999922,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
"64" : 0.100000,
">=64" : 0.000000
},
"latency_ns" : {
"2" : 0.000000,
"4" : 0.000000,
"10" : 0.000000,
"20" : 0.000000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.000000,
"750" : 0.000000,
"1000" : 0.000000
},
"latency_us" : {
"2" : 0.000000,
"4" : 0.000000,
"10" : 0.000000,
"20" : 0.000000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.010000,
"750" : 0.070126,
"1000" : 1.756079
},
"latency_ms" : {
"2" : 95.414131,
"4" : 2.722457,
"10" : 0.040830,
"20" : 0.010000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.000000,
"750" : 0.000000,
"1000" : 0.000000,
"2000" : 0.000000,
">=2000" : 0.000000
},
"latency_depth" : 64,
"latency_target" : 0,
"latency_percentile" : 100.000000,
"latency_window" : 0
}
],
"disk_util" : [
{
"name" : "nvme0n1",
"read_ios" : 3004914,
"write_ios" : 3003760,
"read_merges" : 0,
"write_merges" : 0,
"read_ticks" : 4269143,
"write_ticks" : 4598453,
"in_queue" : 11104,
"util" : 99.840351
}
]
}
"""
jobname_prefix = 'nvme0n1_rand_read_write'
assert (benchmark._process_raw_result(0, test_raw_output))
assert (benchmark.return_code == ReturnCode.SUCCESS)
......
......@@ -3,66 +3,27 @@
"""Tests for GPCNet benchmark."""
import os
import numbers
import unittest
from pathlib import Path
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType
class GPCNetBenchmarkTest(unittest.TestCase): # noqa: E501
class GPCNetBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for GPCNetBenchmark benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench'
binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
Path(binary_path).mkdir(parents=True, exist_ok=True)
self.__binary_files = []
for bin_name in ['network_test', 'network_load_test']:
self.__binary_files.append(Path(binary_path, bin_name))
Path(binary_path, bin_name).touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
for bin_file in self.__binary_files:
bin_file.unlink()
def test_gpcnet_network_test(self):
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/network_test', 'bin/network_load_test'])
@decorator.load_data('tests/data/gpcnet_network_test.log')
@decorator.load_data('tests/data/gpcnet_network_test_error.log')
def test_gpcnet_network_test(self, raw_output, raw_output_no_execution):
"""Test gpcnet-network-test benchmark."""
raw_output = """# noqa: E501
Network Tests v1.3
Test with 2 MPI ranks (2 nodes)
Legend
RR = random ring communication pattern
Nat = natural ring communication pattern
Lat = latency
BW = bandwidth
BW+Sync = bandwidth with barrier
+------------------------------------------------------------------------------+
| Isolated Network Tests |
+---------------------------------+--------------+--------------+--------------+
| Name | Avg | 99% | Units |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| RR Get Lat (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| RR Put BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| Nat Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| Multiple Alltoall (4096 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
"""
# Check registry.
benchmark_name = 'gpcnet-network-test'
(benchmark_class,
......@@ -78,20 +39,6 @@ def test_gpcnet_network_test(self):
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command)
raw_output_no_execution = """
ERROR: this application must be run on at least 2 nodes
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:
Process name: [[63697,1],0]
Exit code: 1
--------------------------------------------------------------------------
"""
assert (benchmark._process_raw_result(0, raw_output_no_execution))
assert (len(benchmark.result) == benchmark.default_metric_count)
......@@ -123,107 +70,10 @@ def test_gpcnet_network_test(self):
assert (benchmark.type == BenchmarkType.MICRO)
assert (benchmark._bin_name == 'network_test')
def test_gpcnet_network_load(self): # noqa: C901
@decorator.load_data('tests/data/gpcnet_network_load.log')
@decorator.load_data('tests/data/gpcnet_network_load_error.log')
def test_gpcnet_network_load(self, raw_output, raw_output_no_execution):
"""Test gpcnet-network-load-test benchmark."""
raw_output = """# noqa: E501
NetworkLoad Tests v1.3
Test with 10 MPI ranks (10 nodes)
2 nodes running Network Tests
8 nodes running Congestion Tests (min 100 nodes per congestor)
Legend
RR = random ring communication pattern
Lat = latency
BW = bandwidth
BW+Sync = bandwidth with barrier
+------------------------------------------------------------------------------------------------------------------------------------------+
| Isolated Network Tests |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Isolated Congestion Tests |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests ( RR Two-sided Lat Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests (RR Two-sided BW+Sync Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests ( Multiple Allreduce Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests - Key Results |
+---------------------------------+--------------------------------------------+
| Name | Congestion Impact Factor |
+---------------------------------+----------------------+---------------------+
| | Avg | 99% |
+---------------------------------+----------------------+---------------------+
| RR Two-sided Lat (8 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
| RR Two-sided BW+Sync (131072 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
| Multiple Allreduce (8 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
"""
# Check registry.
benchmark_name = 'gpcnet-network-load-test'
(benchmark_class,
......@@ -240,20 +90,6 @@ def test_gpcnet_network_load(self): # noqa: C901
assert (command == expect_command)
# Check function process_raw_data.
raw_output_no_execution = """
ERROR: this application must be run on at least 10 nodes
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:
Process name: [[63697,1],0]
Exit code: 1
--------------------------------------------------------------------------
"""
assert (benchmark._process_raw_result(0, raw_output_no_execution))
assert (len(benchmark.result) == benchmark.default_metric_count)
# Positive case - valid raw output.
......
......@@ -3,29 +3,22 @@
"""Tests for gpu-copy-bw benchmark."""
from pathlib import Path
import numbers
import os
import unittest
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
class GpuCopyBwBenchmarkTest(unittest.TestCase):
class GpuCopyBwBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Test class for gpu-copy-bw benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = binary_path / 'gpu_copy'
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/gpu_copy'])
def _test_gpu_copy_bw_performance_command_generation(self, platform):
"""Test gpu-copy benchmark command generation."""
......
......@@ -6,113 +6,42 @@
import os
import numbers
import unittest
from pathlib import Path
from unittest import mock
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType, ReturnCode
from superbench.common.utils import network
from superbench.benchmarks.micro_benchmarks import ib_loopback_performance
class IBLoopbackBenchmarkTest(unittest.TestCase):
class IBLoopbackBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for IBLoopbackBenchmark benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
if (len(network.get_ib_devices()) < 1):
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(binary_path, 'run_perftest_loopback')
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
if (len(network.get_ib_devices()) < 1):
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/run_perftest_loopback'])
def test_ib_loopback_util(self):
"""Test util functions 'get_numa_cores' and 'get_free_port' used in ib-loopback benchmark."""
port = network.get_free_port()
assert (isinstance(port, numbers.Number))
numa_cores = ib_loopback_performance.get_numa_cores(0)
if numa_cores is None:
# in case no NUMA support available on test system
return
assert (len(numa_cores) >= 2)
for i in range(len(numa_cores)):
assert (isinstance(numa_cores[i], numbers.Number))
@decorator.load_data('tests/data/ib_loopback_all_sizes.log')
@mock.patch('superbench.common.utils.network.get_free_port')
@mock.patch('superbench.benchmarks.micro_benchmarks.ib_loopback_performance.get_numa_cores')
@mock.patch('superbench.common.utils.network.get_ib_devices')
def test_ib_loopback_all_sizes(self, mock_ib_devices, mock_numa_cores, mock_port):
def test_ib_loopback_all_sizes(self, raw_output, mock_ib_devices, mock_numa_cores, mock_port):
"""Test ib-loopback benchmark for all sizes."""
raw_output = """
************************************
* Waiting for client to connect... *
************************************
---------------------------------------------------------------------------------------
RDMA_Write BW Test
Dual-port : OFF Device : ibP257p0s0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
---------------------------------------------------------------------------------------
RDMA_Write BW Test
Dual-port : OFF Device : ibP257p0s0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
ibv_wr* API : ON
TX depth : 128
CQ Moderation : 100
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
ibv_wr* API : ON
CQ Moderation : 100
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000
local address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000
remote address: LID 0xd06 QPN 0x092e PSN 0x3eb82d RKey 0x080228 VAddr 0x007f19adcbf000
remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97ff50000
---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
2 2000 5.32 5.30 2.778732
4 2000 10.65 10.64 2.788833
8 2000 21.30 21.27 2.787609
16 2000 42.60 42.55 2.788268
32 2000 84.90 82.82 2.713896
64 2000 173.55 171.66 2.812504
128 2000 362.27 353.83 2.898535
256 2000 687.82 679.37 2.782698
512 2000 1337.12 1311.59 2.686135
1024 2000 2674.25 2649.39 2.712980
2048 2000 5248.56 5118.18 2.620509
4096 2000 10034.02 9948.41 2.546793
8192 2000 18620.51 12782.56 1.636168
16384 2000 23115.27 16782.50 1.074080
32768 2000 22927.94 18586.03 0.594753
65536 2000 23330.56 21167.79 0.338685
131072 2000 22750.35 21443.14 0.171545
262144 2000 22673.63 22411.35 0.089645
524288 2000 22679.02 22678.86 0.045358
1048576 2000 22817.06 22816.86 0.022817
2097152 2000 22919.37 22919.27 0.011460
4194304 2000 23277.93 23277.91 0.005819
8388608 2000 23240.68 23240.68 0.002905
---------------------------------------------------------------------------------------
8388608 2000 23240.68 23240.68 0.002905
---------------------------------------------------------------------------------------
"""
# Test without ib devices
# Check registry.
benchmark_name = 'ib-loopback'
......@@ -179,56 +108,12 @@ def test_ib_loopback_all_sizes(self, mock_ib_devices, mock_numa_cores, mock_port
assert (benchmark._args.iters == 2000)
assert (benchmark._args.commands == ['write'])
@decorator.load_data('tests/data/ib_loopback_8M_size.log')
@mock.patch('superbench.common.utils.network.get_free_port')
@mock.patch('superbench.benchmarks.micro_benchmarks.ib_loopback_performance.get_numa_cores')
@mock.patch('superbench.common.utils.network.get_ib_devices')
def test_ib_loopback_8M_size(self, mock_ib_devices, mock_numa_cores, mock_port):
def test_ib_loopback_8M_size(self, raw_output, mock_ib_devices, mock_numa_cores, mock_port):
"""Test ib-loopback benchmark for 8M size."""
raw_output = """
RDMA_Write BW Test
Dual-port : OFF Device : ibP257p0s0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
TX depth : 128
CQ Moderation : 1
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000
remote address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000
---------------------------------------------------------------------------------------
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
8388608 20000 24056.74 24056.72 0.003007
************************************
* Waiting for client to connect... *
************************************
---------------------------------------------------------------------------------------
RDMA_Write BW Test
Dual-port : OFF Device : ibP257p0s0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
PCIe relax order: ON
CQ Moderation : 1
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0xd06 QPN 0x095e PSN 0xbd024b RKey 0x080258 VAddr 0x007fe62504b000
remote address: LID 0xd06 QPN 0x095f PSN 0x3c9e82 RKey 0x080359 VAddr 0x007f9fc479c000
---------------------------------------------------------------------------------------
#bytes #iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
8388608 20000 24056.74 24056.72 0.003007
---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
"""
# Test without ib devices
# Check registry.
benchmark_name = 'ib-loopback'
......
......@@ -10,26 +10,26 @@
from unittest import mock
from collections import defaultdict
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType, ReturnCode
class IBBenchmarkTest(unittest.TestCase):
class IBBenchmarkTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for IBBenchmark benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench'
binary_path = Path(os.getenv('SB_MICRO_PATH'), 'bin')
binary_path.mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(binary_path, 'ib_validation')
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/ib_validation'])
@classmethod
def tearDownClass(cls):
"""Hook method for deconstructing the class fixture after running all tests in the class."""
p = Path('hostfile')
if p.is_file():
p.unlink()
super().tearDownClass()
def test_generate_config(self): # noqa: C901
"""Test util functions ."""
......
......@@ -3,27 +3,20 @@
"""Tests for gemm-flops benchmark."""
import os
import unittest
from pathlib import Path
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, ReturnCode, Platform, BenchmarkType
class RocmGemmFlopsTest(unittest.TestCase):
class RocmGemmFlopsTest(BenchmarkTestCase, unittest.TestCase):
"""Tests for RocmGemmFlops benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
Path(binary_path).mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(os.path.join(binary_path, 'rocblas-bench'))
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/rocblas-bench'])
def test_rocm_flops_performance(self):
"""Test gemm-flops benchmark."""
......
......@@ -4,29 +4,25 @@
"""Tests for mem-bw benchmark."""
import numbers
from pathlib import Path
import os
import unittest
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
class RocmMemBwTest(unittest.TestCase):
class RocmMemBwTest(BenchmarkTestCase, unittest.TestCase):
"""Test class for rocm mem-bw benchmark."""
def setUp(self):
"""Method called to prepare the test fixture."""
# Create fake binary file just for testing.
os.environ['SB_MICRO_PATH'] = '/tmp/superbench/'
binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
Path(os.getenv('SB_MICRO_PATH'), 'bin').mkdir(parents=True, exist_ok=True)
self.__binary_file = Path(binary_path, 'hipBusBandwidth')
self.__binary_file.touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
def test_rocm_memory_bw_performance(self):
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.createMockEnvs(cls)
cls.createMockFiles(cls, ['bin/hipBusBandwidth'])
@decorator.load_data('tests/data/rocm_memory_h2d_bw.log')
@decorator.load_data('tests/data/rocm_memory_d2h_bw.log')
def test_rocm_memory_bw_performance(self, raw_output_h2d, raw_output_d2h):
"""Test rocm mem-bw benchmark."""
benchmark_name = 'mem-bw'
(benchmark_class,
......@@ -51,114 +47,7 @@ def test_rocm_memory_bw_performance(self):
assert (commnad == expected_command[i])
# Check results and metrics.
raw_output = {}
raw_output[0] = """
Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned
test atts units median mean stddev min max
H2D_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
H2D_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
H2D_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
H2D_Bandwidth_pinned 1kB GB/sec 0.0414 0.0411 0.0017 0.0189 0.0434
H2D_Bandwidth_pinned 2kB GB/sec 0.0828 0.0824 0.0018 0.0683 0.0862
H2D_Bandwidth_pinned 4kB GB/sec 0.1656 0.1652 0.0032 0.1374 0.1724
H2D_Bandwidth_pinned 8kB GB/sec 0.3268 0.3251 0.0117 0.1880 0.3425
H2D_Bandwidth_pinned 16kB GB/sec 0.6410 0.6365 0.0259 0.3597 0.6757
H2D_Bandwidth_pinned 32kB GB/sec 1.2422 1.2432 0.0278 0.9346 1.2987
H2D_Bandwidth_pinned 64kB GB/sec 2.3968 2.4161 0.1486 0.7242 2.6042
H2D_Bandwidth_pinned 128kB GB/sec 4.6786 4.6339 0.1310 4.1143 4.8162
H2D_Bandwidth_pinned 256kB GB/sec 7.8349 7.8369 0.1150 6.9093 8.0270
H2D_Bandwidth_pinned 512kB GB/sec 11.9963 11.9828 0.1287 11.2158 12.2201
H2D_Bandwidth_pinned 1024kB GB/sec 16.3342 16.3315 0.0956 16.0147 16.5823
H2D_Bandwidth_pinned 2048kB GB/sec 19.9790 19.9770 0.0853 19.7681 20.1635
H2D_Bandwidth_pinned 4096kB GB/sec 22.2706 22.2642 0.0552 22.0644 22.3847
H2D_Bandwidth_pinned 8192kB GB/sec 22.8232 22.7881 0.1669 21.3196 22.8930
H2D_Bandwidth_pinned 16384kB GB/sec 24.1521 24.1411 0.0429 24.0165 24.2162
H2D_Bandwidth_pinned 32768kB GB/sec 24.8695 24.7086 0.7491 20.6288 24.9035
H2D_Bandwidth_pinned 65536kB GB/sec 24.4840 24.0101 2.5769 6.1754 24.5292
H2D_Bandwidth_pinned 131072kB GB/sec 25.0487 24.9593 0.2601 24.1286 25.0711
H2D_Bandwidth_pinned 262144kB GB/sec 25.3280 25.2351 0.1788 24.8746 25.3498
H2D_Bandwidth_pinned 524288kB GB/sec 24.7523 24.6708 0.1586 24.3154 24.7880
H2D_Timepinned +064By ms 0.0245 0.0253 0.0240 0.0232 0.7821
H2D_Timepinned +256By ms 0.0243 0.0244 0.0013 0.0232 0.0546
H2D_Timepinned +512By ms 0.0243 0.0244 0.0014 0.0230 0.0566
H2D_Timepinned 1kB ms 0.0242 0.0244 0.0016 0.0230 0.0530
H2D_Timepinned 2kB ms 0.0242 0.0243 0.0005 0.0232 0.0293
H2D_Timepinned 4kB ms 0.0242 0.0242 0.0005 0.0232 0.0291
H2D_Timepinned 8kB ms 0.0245 0.0247 0.0013 0.0234 0.0426
H2D_Timepinned 16kB ms 0.0250 0.0252 0.0015 0.0237 0.0445
H2D_Timepinned 32kB ms 0.0258 0.0258 0.0006 0.0246 0.0342
H2D_Timepinned 64kB ms 0.0271 0.0272 0.0045 0.0250 0.0898
H2D_Timepinned 128kB ms 0.0280 0.0283 0.0008 0.0272 0.0318
H2D_Timepinned 256kB ms 0.0334 0.0334 0.0005 0.0326 0.0379
H2D_Timepinned 512kB ms 0.0437 0.0437 0.0005 0.0429 0.0467
H2D_Timepinned 1024kB ms 0.0642 0.0642 0.0004 0.0632 0.0654
H2D_Timepinned 2048kB ms 0.1050 0.1050 0.0004 0.1040 0.1061
H2D_Timepinned 4096kB ms 0.1883 0.1884 0.0005 0.1874 0.1901
H2D_Timepinned 8192kB ms 0.3675 0.3681 0.0028 0.3664 0.3934
H2D_Timepinned 16384kB ms 0.6946 0.6950 0.0012 0.6928 0.6986
H2D_Timepinned 32768kB ms 1.3492 1.3595 0.0482 1.3474 1.6266
H2D_Timepinned 65536kB ms 2.7409 2.9163 1.1368 2.7358 10.8670
H2D_Timepinned 131072kB ms 5.3582 5.3780 0.0576 5.3534 5.5626
H2D_Timepinned 262144kB ms 10.5983 10.6379 0.0761 10.5892 10.7915
H2D_Timepinned 524288kB ms 21.6897 21.7622 0.1411 21.6585 22.0794
Note: results marked with (*) had missing values such as
might occur with a mixture of architectural capabilities.
"""
raw_output[1] = """
Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned
test atts units median mean stddev min max
D2H_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
D2H_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
D2H_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000
D2H_Bandwidth_pinned 1kB GB/sec 0.0428 0.0426 0.0019 0.0114 0.0446
D2H_Bandwidth_pinned 2kB GB/sec 0.0850 0.0844 0.0034 0.0415 0.0893
D2H_Bandwidth_pinned 4kB GB/sec 0.1701 0.1687 0.0084 0.0504 0.1773
D2H_Bandwidth_pinned 8kB GB/sec 0.3378 0.3348 0.0168 0.1085 0.3546
D2H_Bandwidth_pinned 16kB GB/sec 0.6667 0.6606 0.0218 0.5618 0.6897
D2H_Bandwidth_pinned 32kB GB/sec 1.3072 1.2954 0.0663 0.5682 1.3605
D2H_Bandwidth_pinned 64kB GB/sec 2.5550 2.5339 0.0955 2.1382 2.6904
D2H_Bandwidth_pinned 128kB GB/sec 4.8162 4.7807 0.2331 2.0940 4.9621
D2H_Bandwidth_pinned 256kB GB/sec 8.2286 8.2192 0.1671 7.2456 8.5286
D2H_Bandwidth_pinned 512kB GB/sec 12.7930 12.7062 0.4407 7.1196 13.0478
D2H_Bandwidth_pinned 1024kB GB/sec 17.5603 17.4938 0.3921 12.7184 17.7989
D2H_Bandwidth_pinned 2048kB GB/sec 21.6275 21.5591 0.2233 20.6073 21.8076
D2H_Bandwidth_pinned 4096kB GB/sec 24.2708 24.2556 0.0942 23.5724 24.4292
D2H_Bandwidth_pinned 8192kB GB/sec 24.9287 24.9093 0.0733 24.7171 25.0359
D2H_Bandwidth_pinned 16384kB GB/sec 26.4588 26.1976 2.4387 1.9387 26.5191
D2H_Bandwidth_pinned 32768kB GB/sec 27.2939 27.1202 0.7941 23.2086 27.3277
D2H_Bandwidth_pinned 65536kB GB/sec 26.8278 26.7238 0.3894 24.7946 26.9000
D2H_Bandwidth_pinned 131072kB GB/sec 27.4751 27.3457 0.3968 25.4168 27.5098
D2H_Bandwidth_pinned 262144kB GB/sec 27.8236 27.7173 0.3072 26.7977 27.8525
D2H_Bandwidth_pinned 524288kB GB/sec 28.0193 27.9348 0.1912 27.4707 28.0314
D2H_Time_pinned +064By ms 0.0229 0.0246 0.0457 0.0216 1.4690
D2H_Time_pinned +256By ms 0.0232 0.0234 0.0013 0.0221 0.0378
D2H_Time_pinned +512By ms 0.0234 0.0238 0.0063 0.0224 0.2091
D2H_Time_pinned 1kB ms 0.0234 0.0236 0.0028 0.0224 0.0875
D2H_Time_pinned 2kB ms 0.0235 0.0237 0.0014 0.0224 0.0482
D2H_Time_pinned 4kB ms 0.0235 0.0239 0.0031 0.0226 0.0794
D2H_Time_pinned 8kB ms 0.0237 0.0240 0.0027 0.0226 0.0738
D2H_Time_pinned 16kB ms 0.0240 0.0242 0.0009 0.0232 0.0285
D2H_Time_pinned 32kB ms 0.0245 0.0248 0.0021 0.0235 0.0563
D2H_Time_pinned 64kB ms 0.0254 0.0257 0.0011 0.0242 0.0304
D2H_Time_pinned 128kB ms 0.0272 0.0275 0.0026 0.0264 0.0626
D2H_Time_pinned 256kB ms 0.0318 0.0319 0.0007 0.0307 0.0362
D2H_Time_pinned 512kB ms 0.0410 0.0413 0.0024 0.0402 0.0736
D2H_Time_pinned 1024kB ms 0.0597 0.0599 0.0017 0.0589 0.0824
D2H_Time_pinned 2048kB ms 0.0970 0.0973 0.0010 0.0962 0.1018
D2H_Time_pinned 4096kB ms 0.1728 0.1729 0.0007 0.1717 0.1779
D2H_Time_pinned 8192kB ms 0.3365 0.3367 0.0010 0.3350 0.3394
D2H_Time_pinned 16384kB ms 0.6341 0.7147 0.7979 0.6326 8.6538
D2H_Time_pinned 32768kB ms 1.2294 1.2385 0.0420 1.2278 1.4458
D2H_Time_pinned 65536kB ms 2.5014 2.5117 0.0391 2.4947 2.7066
D2H_Time_pinned 131072kB ms 4.8850 4.9092 0.0748 4.8789 5.2806
D2H_Time_pinned 262144kB ms 9.6478 9.6860 0.1106 9.6377 10.0171
D2H_Time_pinned 524288kB ms 19.1607 19.2196 0.1333 19.1525 19.5434
Note: results marked with (*) had missing values such as
might occur with a mixture of architectural capabilities.
"""
raw_output = [raw_output_h2d, raw_output_d2h]
for i, metric in enumerate(['h2d_bw', 'd2h_bw']):
assert (benchmark._process_raw_result(i, raw_output[i]))
assert (metric in benchmark.result)
......
......@@ -3,36 +3,29 @@
"""Tests for tensorrt-inference benchmark."""
import os
import shutil
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from tests.helper import decorator
from tests.helper.testcase import BenchmarkTestCase
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
from superbench.benchmarks.result import BenchmarkResult
class TensorRTInferenceBenchmarkTestCase(unittest.TestCase):
class TensorRTInferenceBenchmarkTestCase(BenchmarkTestCase, unittest.TestCase):
"""Class for tensorrt-inferencee benchmark test cases."""
def setUp(self):
"""Hook method for setting up the test fixture before exercising it."""
self.benchmark_name = 'tensorrt-inference'
self.__tmp_dir = tempfile.mkdtemp()
self.__model_path = Path(self.__tmp_dir) / 'hub' / 'onnx'
self.__curr_micro_path = os.environ.get('SB_MICRO_PATH', '')
os.environ['TORCH_HOME'] = self.__tmp_dir
os.environ['SB_MICRO_PATH'] = self.__tmp_dir
(Path(self.__tmp_dir) / 'bin').mkdir(parents=True, exist_ok=True)
(Path(self.__tmp_dir) / 'bin' / 'trtexec').touch(mode=0o755, exist_ok=True)
def tearDown(self):
"""Hook method for deconstructing the test fixture after testing it."""
shutil.rmtree(self.__tmp_dir)
os.environ['SB_MICRO_PATH'] = self.__curr_micro_path
del os.environ['TORCH_HOME']
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in the class."""
super().setUpClass()
cls.benchmark_name = 'tensorrt-inference'
cls._model_path = Path(cls._tmp_dir) / 'hub' / 'onnx'
cls.createMockEnvs(cls, {
'TORCH_HOME': cls._tmp_dir,
'SB_MICRO_PATH': cls._tmp_dir,
})
cls.createMockFiles(cls, ['bin/trtexec'])
def test_tensorrt_inference_cls(self):
"""Test tensorrt-inference benchmark class."""
......@@ -116,7 +109,7 @@ def test_tensorrt_inference_params(self):
# Check models
for model in benchmark._args.pytorch_models:
self.assertTrue((self.__model_path / f'{model}.onnx').is_file())
self.assertTrue((self._model_path / f'{model}.onnx').is_file())
# Command list should equal to default model number
self.assertEqual(
......
[CUDA Bandwidth Test] - Starting...
Running on...
Device 0: Tesla V100-PCIE-32GB
Shmoo Mode
.................................................................................
bandwidthTest-D2D, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 0.1 GB/s, Time = 0.00004 s, Size = 2000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 0.8 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 1.2 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 0.4 GB/s, Time = 0.00001 s, Size = 5000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 1.7 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 7.0 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 9.0 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 10.0 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 6.1 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 12.0 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 13.1 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 8.9 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 9.5 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 9.8 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 19.0 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 22.0 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 6.3 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 0.7 GB/s, Time = 0.00004 s, Size = 26000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 28.1 GB/s, Time = 0.00000 s, Size = 28000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 30.1 GB/s, Time = 0.00000 s, Size = 30000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 32.0 GB/s, Time = 0.00000 s, Size = 32000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 14.6 GB/s, Time = 0.00000 s, Size = 34000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 20.9 GB/s, Time = 0.00000 s, Size = 36000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 22.7 GB/s, Time = 0.00000 s, Size = 38000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 23.5 GB/s, Time = 0.00000 s, Size = 40000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 24.8 GB/s, Time = 0.00000 s, Size = 42000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 44.1 GB/s, Time = 0.00000 s, Size = 44000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 27.2 GB/s, Time = 0.00000 s, Size = 46000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 48.0 GB/s, Time = 0.00000 s, Size = 48000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 28.5 GB/s, Time = 0.00000 s, Size = 50000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 60.2 GB/s, Time = 0.00000 s, Size = 60000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 42.7 GB/s, Time = 0.00000 s, Size = 70000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 8.4 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 55.6 GB/s, Time = 0.00000 s, Size = 90000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 59.6 GB/s, Time = 0.00000 s, Size = 100000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 127.9 GB/s, Time = 0.00000 s, Size = 200000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 183.1 GB/s, Time = 0.00000 s, Size = 300000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 270.2 GB/s, Time = 0.00000 s, Size = 400000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 15.5 GB/s, Time = 0.00003 s, Size = 500000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 399.2 GB/s, Time = 0.00000 s, Size = 600000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 172.1 GB/s, Time = 0.00000 s, Size = 700000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 27.5 GB/s, Time = 0.00003 s, Size = 800000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 71.3 GB/s, Time = 0.00001 s, Size = 900000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 502.2 GB/s, Time = 0.00000 s, Size = 1000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 59.4 GB/s, Time = 0.00003 s, Size = 2000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 348.7 GB/s, Time = 0.00001 s, Size = 3000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 519.4 GB/s, Time = 0.00001 s, Size = 4000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 422.3 GB/s, Time = 0.00001 s, Size = 5000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 447.9 GB/s, Time = 0.00001 s, Size = 6000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 225.3 GB/s, Time = 0.00003 s, Size = 7000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 146.0 GB/s, Time = 0.00005 s, Size = 8000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 190.9 GB/s, Time = 0.00005 s, Size = 9000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 301.1 GB/s, Time = 0.00003 s, Size = 10000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 192.8 GB/s, Time = 0.00006 s, Size = 11000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 243.9 GB/s, Time = 0.00005 s, Size = 12000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 328.7 GB/s, Time = 0.00004 s, Size = 13000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 621.2 GB/s, Time = 0.00002 s, Size = 14000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 682.5 GB/s, Time = 0.00002 s, Size = 15000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 686.3 GB/s, Time = 0.00002 s, Size = 16000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 693.1 GB/s, Time = 0.00003 s, Size = 18000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 707.0 GB/s, Time = 0.00003 s, Size = 20000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 714.4 GB/s, Time = 0.00003 s, Size = 22000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 719.4 GB/s, Time = 0.00003 s, Size = 24000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 723.2 GB/s, Time = 0.00004 s, Size = 26000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 726.7 GB/s, Time = 0.00004 s, Size = 28000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 728.8 GB/s, Time = 0.00004 s, Size = 30000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 724.2 GB/s, Time = 0.00004 s, Size = 32000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 735.3 GB/s, Time = 0.00005 s, Size = 36000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 741.1 GB/s, Time = 0.00005 s, Size = 40000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 748.9 GB/s, Time = 0.00006 s, Size = 44000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 748.9 GB/s, Time = 0.00006 s, Size = 48000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 754.1 GB/s, Time = 0.00007 s, Size = 52000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 757.4 GB/s, Time = 0.00007 s, Size = 56000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 758.5 GB/s, Time = 0.00008 s, Size = 60000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 772.0 GB/s, Time = 0.00008 s, Size = 64000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 762.8 GB/s, Time = 0.00009 s, Size = 68000000 bytes, NumDevsUsed = 1
Result = PASS
[CUDA Bandwidth Test] - Starting...
Running on...
Device 0: Tesla V100-PCIE-32GB
Shmoo Mode
.................................................................................
bandwidthTest-D2H-Pinned, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 0.5 GB/s, Time = 0.00000 s, Size = 2000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 0.9 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 1.1 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 1.4 GB/s, Time = 0.00000 s, Size = 5000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 1.9 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 2.6 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 2.9 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 3.3 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 3.7 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 4.0 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 4.5 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 4.9 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 5.3 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 5.6 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 5.7 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 6.0 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 6.2 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 6.3 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 6.5 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 6.9 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 7.1 GB/s, Time = 0.00000 s, Size = 26000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 7.4 GB/s, Time = 0.00000 s, Size = 28000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 7.6 GB/s, Time = 0.00000 s, Size = 30000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 7.9 GB/s, Time = 0.00000 s, Size = 32000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 8.0 GB/s, Time = 0.00000 s, Size = 34000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 8.3 GB/s, Time = 0.00000 s, Size = 36000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 8.5 GB/s, Time = 0.00000 s, Size = 38000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 8.6 GB/s, Time = 0.00000 s, Size = 40000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 8.7 GB/s, Time = 0.00000 s, Size = 42000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 9.3 GB/s, Time = 0.00000 s, Size = 44000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 9.4 GB/s, Time = 0.00000 s, Size = 46000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 9.5 GB/s, Time = 0.00001 s, Size = 48000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 9.5 GB/s, Time = 0.00001 s, Size = 50000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 10.1 GB/s, Time = 0.00001 s, Size = 60000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 10.4 GB/s, Time = 0.00001 s, Size = 70000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 10.6 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 10.9 GB/s, Time = 0.00001 s, Size = 90000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 11.1 GB/s, Time = 0.00001 s, Size = 100000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.0 GB/s, Time = 0.00002 s, Size = 200000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00002 s, Size = 300000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.6 GB/s, Time = 0.00003 s, Size = 400000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.6 GB/s, Time = 0.00004 s, Size = 500000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.7 GB/s, Time = 0.00005 s, Size = 600000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.7 GB/s, Time = 0.00006 s, Size = 700000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.8 GB/s, Time = 0.00006 s, Size = 800000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.9 GB/s, Time = 0.00007 s, Size = 900000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.8 GB/s, Time = 0.00008 s, Size = 1000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.0 GB/s, Time = 0.00015 s, Size = 2000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.0 GB/s, Time = 0.00023 s, Size = 3000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00031 s, Size = 4000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00038 s, Size = 5000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00046 s, Size = 6000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00053 s, Size = 7000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00061 s, Size = 8000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.5 GB/s, Time = 0.00072 s, Size = 9000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00076 s, Size = 10000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00084 s, Size = 11000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00091 s, Size = 12000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00099 s, Size = 13000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00106 s, Size = 14000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00114 s, Size = 15000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00122 s, Size = 16000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00137 s, Size = 18000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00152 s, Size = 20000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00167 s, Size = 22000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00183 s, Size = 24000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 12.9 GB/s, Time = 0.00202 s, Size = 26000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00213 s, Size = 28000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00228 s, Size = 30000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00243 s, Size = 32000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00273 s, Size = 36000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00304 s, Size = 40000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00334 s, Size = 44000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00364 s, Size = 48000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00395 s, Size = 52000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00425 s, Size = 56000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.2 GB/s, Time = 0.00455 s, Size = 60000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00487 s, Size = 64000000 bytes, NumDevsUsed = 1
bandwidthTest-D2H-Pinned, Bandwidth = 13.1 GB/s, Time = 0.00520 s, Size = 68000000 bytes, NumDevsUsed = 1
Result = PASS
[CUDA Bandwidth Test] - Starting...
Running on...
Device 0: Tesla V100-PCIE-32GB
Shmoo Mode
.................................................................................
bandwidthTest-H2D-Pinned, Bandwidth = 0.4 GB/s, Time = 0.00000 s, Size = 1000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 0.7 GB/s, Time = 0.00000 s, Size = 2000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 1.0 GB/s, Time = 0.00000 s, Size = 3000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 1.4 GB/s, Time = 0.00000 s, Size = 4000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 1.7 GB/s, Time = 0.00000 s, Size = 5000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 2.0 GB/s, Time = 0.00000 s, Size = 6000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 2.3 GB/s, Time = 0.00000 s, Size = 7000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 2.5 GB/s, Time = 0.00000 s, Size = 8000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 2.7 GB/s, Time = 0.00000 s, Size = 9000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 2.9 GB/s, Time = 0.00000 s, Size = 10000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 3.2 GB/s, Time = 0.00000 s, Size = 11000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 3.4 GB/s, Time = 0.00000 s, Size = 12000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 3.5 GB/s, Time = 0.00000 s, Size = 13000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 3.5 GB/s, Time = 0.00000 s, Size = 14000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 3.8 GB/s, Time = 0.00000 s, Size = 15000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 4.0 GB/s, Time = 0.00000 s, Size = 16000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 4.1 GB/s, Time = 0.00000 s, Size = 17000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 4.3 GB/s, Time = 0.00000 s, Size = 18000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 4.4 GB/s, Time = 0.00000 s, Size = 19000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 4.6 GB/s, Time = 0.00000 s, Size = 20000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 4.8 GB/s, Time = 0.00000 s, Size = 22000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 5.0 GB/s, Time = 0.00000 s, Size = 24000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 5.2 GB/s, Time = 0.00000 s, Size = 26000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 5.4 GB/s, Time = 0.00001 s, Size = 28000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 5.7 GB/s, Time = 0.00001 s, Size = 30000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 5.9 GB/s, Time = 0.00001 s, Size = 32000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 6.1 GB/s, Time = 0.00001 s, Size = 34000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 6.3 GB/s, Time = 0.00001 s, Size = 36000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 6.4 GB/s, Time = 0.00001 s, Size = 38000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 6.6 GB/s, Time = 0.00001 s, Size = 40000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 6.7 GB/s, Time = 0.00001 s, Size = 42000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 6.9 GB/s, Time = 0.00001 s, Size = 44000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 7.0 GB/s, Time = 0.00001 s, Size = 46000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 7.1 GB/s, Time = 0.00001 s, Size = 48000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 7.3 GB/s, Time = 0.00001 s, Size = 50000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 7.8 GB/s, Time = 0.00001 s, Size = 60000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 8.2 GB/s, Time = 0.00001 s, Size = 70000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 8.6 GB/s, Time = 0.00001 s, Size = 80000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 8.9 GB/s, Time = 0.00001 s, Size = 90000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 9.2 GB/s, Time = 0.00001 s, Size = 100000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 10.5 GB/s, Time = 0.00002 s, Size = 200000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.1 GB/s, Time = 0.00003 s, Size = 300000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.4 GB/s, Time = 0.00004 s, Size = 400000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.6 GB/s, Time = 0.00004 s, Size = 500000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00005 s, Size = 600000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.8 GB/s, Time = 0.00006 s, Size = 700000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.9 GB/s, Time = 0.00007 s, Size = 800000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.9 GB/s, Time = 0.00008 s, Size = 900000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00009 s, Size = 1000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00016 s, Size = 2000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00024 s, Size = 3000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00033 s, Size = 4000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.5 GB/s, Time = 0.00043 s, Size = 5000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00049 s, Size = 6000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00057 s, Size = 7000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00065 s, Size = 8000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.3 GB/s, Time = 0.00073 s, Size = 9000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00081 s, Size = 10000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00089 s, Size = 11000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00097 s, Size = 12000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00105 s, Size = 13000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00113 s, Size = 14000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00121 s, Size = 15000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00129 s, Size = 16000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00145 s, Size = 18000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00162 s, Size = 20000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00178 s, Size = 22000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00194 s, Size = 24000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00210 s, Size = 26000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00226 s, Size = 28000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00242 s, Size = 30000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 10.5 GB/s, Time = 0.00304 s, Size = 32000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.2 GB/s, Time = 0.00295 s, Size = 36000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 10.8 GB/s, Time = 0.00369 s, Size = 40000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00355 s, Size = 44000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00387 s, Size = 48000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00431 s, Size = 52000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 11.7 GB/s, Time = 0.00480 s, Size = 56000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00484 s, Size = 60000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.1 GB/s, Time = 0.00528 s, Size = 64000000 bytes, NumDevsUsed = 1
bandwidthTest-H2D-Pinned, Bandwidth = 12.4 GB/s, Time = 0.00549 s, Size = 68000000 bytes, NumDevsUsed = 1
Result = PASS
{
"fio version" : "fio-3.16",
"timestamp" : 1626763278,
"timestamp_ms" : 1626763278577,
"time" : "Tue Jul 20 06:41:18 2021",
"global options" : {
"filename" : "/dev/nvme0n1",
"ramp_time" : "10s",
"runtime" : "30s",
"iodepth" : "64",
"numjobs" : "4",
"randrepeat" : "1",
"thread" : "1",
"ioengine" : "libaio",
"direct" : "1",
"norandommap" : "1",
"lat_percentiles" : "1",
"group_reporting" : "1"
},
"jobs" : [
{
"jobname" : "rand_read_write",
"groupid" : 0,
"error" : 0,
"eta" : 0,
"elapsed" : 41,
"job options" : {
"name" : "rand_read",
"rw" : "randrw",
"bs" : "4096",
"time_based" : "1"
},
"read" : {
"io_bytes" : 10463010816,
"io_kbytes" : 10217784,
"bw_bytes" : 348743777,
"bw" : 340570,
"iops" : 85138.890741,
"runtime" : 30002,
"total_ios" : 2554337,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 1332,
"max" : 48691,
"mean" : 2032.588341,
"stddev" : 864.921965
},
"clat_ns" : {
"min" : 278533,
"max" : 10175655,
"mean" : 1444476.063469,
"stddev" : 300748.583131
},
"lat_ns" : {
"min" : 280646,
"max" : 10177629,
"mean" : 1446562.147113,
"stddev" : 300723.879349,
"percentile" : {
"1.000000" : 872448,
"5.000000" : 1036288,
"10.000000" : 1122304,
"20.000000" : 1220608,
"30.000000" : 1286144,
"40.000000" : 1351680,
"50.000000" : 1417216,
"60.000000" : 1482752,
"70.000000" : 1564672,
"80.000000" : 1662976,
"90.000000" : 1810432,
"95.000000" : 1941504,
"99.000000" : 2244608,
"99.500000" : 2408448,
"99.900000" : 3620864,
"99.950000" : 4358144,
"99.990000" : 6062080
}
},
"bw_min" : 291288,
"bw_max" : 380288,
"bw_agg" : 99.999134,
"bw_mean" : 340567.050000,
"bw_dev" : 6222.338382,
"bw_samples" : 240,
"iops_min" : 72822,
"iops_max" : 95072,
"iops_mean" : 85141.733333,
"iops_stddev" : 1555.582888,
"iops_samples" : 240
},
"write" : {
"io_bytes" : 10454208512,
"io_kbytes" : 10209188,
"bw_bytes" : 348450387,
"bw" : 340283,
"iops" : 85066.128925,
"runtime" : 30002,
"total_ios" : 2552154,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 1383,
"max" : 315361,
"mean" : 2182.824623,
"stddev" : 919.625590
},
"clat_ns" : {
"min" : 433904,
"max" : 6300941,
"mean" : 1558511.433458,
"stddev" : 207734.850159
},
"lat_ns" : {
"min" : 441909,
"max" : 6302845,
"mean" : 1560749.444938,
"stddev" : 207695.144244,
"percentile" : {
"1.000000" : 1155072,
"5.000000" : 1269760,
"10.000000" : 1318912,
"20.000000" : 1384448,
"30.000000" : 1449984,
"40.000000" : 1499136,
"50.000000" : 1531904,
"60.000000" : 1597440,
"70.000000" : 1646592,
"80.000000" : 1728512,
"90.000000" : 1826816,
"95.000000" : 1908736,
"99.000000" : 2072576,
"99.500000" : 2179072,
"99.900000" : 2605056,
"99.950000" : 3031040,
"99.990000" : 4358144
}
},
"bw_min" : 288464,
"bw_max" : 380080,
"bw_agg" : 99.998134,
"bw_mean" : 340276.650000,
"bw_dev" : 6293.894521,
"bw_samples" : 240,
"iops_min" : 72116,
"iops_max" : 95020,
"iops_mean" : 85069.133333,
"iops_stddev" : 1573.475038,
"iops_samples" : 240
},
"trim" : {
"io_bytes" : 0,
"io_kbytes" : 0,
"bw_bytes" : 0,
"bw" : 0,
"iops" : 0.000000,
"runtime" : 0,
"total_ios" : 0,
"short_ios" : 0,
"drop_ios" : 0,
"slat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"clat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"lat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000,
"percentile" : {
"1.000000" : 0,
"5.000000" : 0,
"10.000000" : 0,
"20.000000" : 0,
"30.000000" : 0,
"40.000000" : 0,
"50.000000" : 0,
"60.000000" : 0,
"70.000000" : 0,
"80.000000" : 0,
"90.000000" : 0,
"95.000000" : 0,
"99.000000" : 0,
"99.500000" : 0,
"99.900000" : 0,
"99.950000" : 0,
"99.990000" : 0
}
},
"bw_min" : 0,
"bw_max" : 0,
"bw_agg" : 0.000000,
"bw_mean" : 0.000000,
"bw_dev" : 0.000000,
"bw_samples" : 0,
"iops_min" : 0,
"iops_max" : 0,
"iops_mean" : 0.000000,
"iops_stddev" : 0.000000,
"iops_samples" : 0
},
"sync" : {
"lat_ns" : {
"min" : 0,
"max" : 0,
"mean" : 0.000000,
"stddev" : 0.000000
},
"total_ios" : 0
},
"job_runtime" : 120004,
"usr_cpu" : 4.833172,
"sys_cpu" : 20.800973,
"ctx" : 3542118,
"majf" : 0,
"minf" : 1263,
"iodepth_level" : {
"1" : 0.000000,
"2" : 0.000000,
"4" : 0.000000,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
">=64" : 100.000000
},
"iodepth_submit" : {
"0" : 0.000000,
"4" : 100.000000,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
"64" : 0.000000,
">=64" : 0.000000
},
"iodepth_complete" : {
"0" : 0.000000,
"4" : 99.999922,
"8" : 0.000000,
"16" : 0.000000,
"32" : 0.000000,
"64" : 0.100000,
">=64" : 0.000000
},
"latency_ns" : {
"2" : 0.000000,
"4" : 0.000000,
"10" : 0.000000,
"20" : 0.000000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.000000,
"750" : 0.000000,
"1000" : 0.000000
},
"latency_us" : {
"2" : 0.000000,
"4" : 0.000000,
"10" : 0.000000,
"20" : 0.000000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.010000,
"750" : 0.070126,
"1000" : 1.756079
},
"latency_ms" : {
"2" : 95.414131,
"4" : 2.722457,
"10" : 0.040830,
"20" : 0.010000,
"50" : 0.000000,
"100" : 0.000000,
"250" : 0.000000,
"500" : 0.000000,
"750" : 0.000000,
"1000" : 0.000000,
"2000" : 0.000000,
">=2000" : 0.000000
},
"latency_depth" : 64,
"latency_target" : 0,
"latency_percentile" : 100.000000,
"latency_window" : 0
}
],
"disk_util" : [
{
"name" : "nvme0n1",
"read_ios" : 3004914,
"write_ios" : 3003760,
"read_merges" : 0,
"write_merges" : 0,
"read_ticks" : 4269143,
"write_ticks" : 4598453,
"in_queue" : 11104,
"util" : 99.840351
}
]
}
NetworkLoad Tests v1.3
Test with 10 MPI ranks (10 nodes)
2 nodes running Network Tests
8 nodes running Congestion Tests (min 100 nodes per congestor)
Legend
RR = random ring communication pattern
Lat = latency
BW = bandwidth
BW+Sync = bandwidth with barrier
+------------------------------------------------------------------------------------------------------------------------------------------+
| Isolated Network Tests |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Isolated Congestion Tests |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests ( RR Two-sided Lat Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests (RR Two-sided BW+Sync Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests ( Multiple Allreduce Network Test) |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Name | Min | Max | Avg | Avg(Worst) | 99% | 99.9% | Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Alltoall (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Two-sided Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Put Incast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| Get Bcast (4096 B) | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
+------------------------------------------------------------------------------+
| Network Tests running with Congestion Tests - Key Results |
+---------------------------------+--------------------------------------------+
| Name | Congestion Impact Factor |
+---------------------------------+----------------------+---------------------+
| | Avg | 99% |
+---------------------------------+----------------------+---------------------+
| RR Two-sided Lat (8 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
| RR Two-sided BW+Sync (131072 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
| Multiple Allreduce (8 B) | 0.0X | 0.0X |
+---------------------------------+----------------------+---------------------+
ERROR: this application must be run on at least 10 nodes
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:
Process name: [[63697,1],0]
Exit code: 1
--------------------------------------------------------------------------
Network Tests v1.3
Test with 2 MPI ranks (2 nodes)
Legend
RR = random ring communication pattern
Nat = natural ring communication pattern
Lat = latency
BW = bandwidth
BW+Sync = bandwidth with barrier
+------------------------------------------------------------------------------+
| Isolated Network Tests |
+---------------------------------+--------------+--------------+--------------+
| Name | Avg | 99% | Units |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided Lat (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| RR Get Lat (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| RR Put BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| Nat Two-sided BW (131072 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| Multiple Allreduce (8 B) | 10000.0 | 10000.0 | usec |
+---------------------------------+--------------+--------------+--------------+
| Multiple Alltoall (4096 B) | 10000.0 | 10000.0 | MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
ERROR: this application must be run on at least 2 nodes
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:
Process name: [[63697,1],0]
Exit code: 1
--------------------------------------------------------------------------
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment