Unverified Commit 9f56b219 authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Unify metric names of benchmarks (#252)

**Description**
Unify metric names of benchmarks.
parent c13ed2a2
......@@ -373,7 +373,10 @@ def __process_model_result(self, model_action, precision, step_times):
)
return False
metric = 'steptime_{}_{}'.format(model_action, precision)
precision_metric = {'float16': 'fp16', 'float32': 'fp32', 'float64': 'fp64', 'bfloat16': 'bf16'}
if precision.value in precision_metric.keys():
precision = precision_metric[precision.value]
metric = '{}_{}_step_time'.format(precision, model_action)
self._result.add_raw_data(metric, step_times)
avg = statistics.mean(step_times)
self._result.add_result(metric, avg, reduce_type=ReduceType.MAX if model_action is ModelAction.TRAIN else None)
......@@ -381,7 +384,7 @@ def __process_model_result(self, model_action, precision, step_times):
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
millisecond_per_second = 1000
throughput = [millisecond_per_second / step_time * self._args.batch_size for step_time in step_times]
metric = 'throughput_{}_{}'.format(model_action, precision)
metric = '{}_{}_throughput'.format(precision, model_action)
self._result.add_raw_data(metric, throughput)
avg = statistics.mean(throughput)
self._result.add_result(metric, avg, reduce_type=ReduceType.MIN if model_action is ModelAction.TRAIN else None)
......
......@@ -44,13 +44,13 @@ def test_rocm_onnxruntime_performance():
"samples_per_second": 274.455
"""
assert (benchmark._process_raw_result(0, raw_output))
assert (benchmark.result['bert_large_uncased_ngpu_1'][0] == 21.829)
assert (benchmark.result['bert_large_uncased_ngpu_8'][0] == 147.181)
assert (benchmark.result['distilbert_base_uncased_ngpu_1'][0] == 126.827)
assert (benchmark.result['distilbert_base_uncased_ngpu_8'][0] == 966.796)
assert (benchmark.result['gpt2_ngpu_1'][0] == 20.46)
assert (benchmark.result['gpt2_ngpu_8'][0] == 151.089)
assert (benchmark.result['facebook_bart_large_ngpu_1'][0] == 66.171)
assert (benchmark.result['facebook_bart_large_ngpu_8'][0] == 370.343)
assert (benchmark.result['roberta_large_ngpu_1'][0] == 37.103)
assert (benchmark.result['roberta_large_ngpu_8'][0] == 274.455)
assert (benchmark.result['bert_large_uncased_ngpu_1_throughput'][0] == 21.829)
assert (benchmark.result['bert_large_uncased_ngpu_8_throughput'][0] == 147.181)
assert (benchmark.result['distilbert_base_uncased_ngpu_1_throughput'][0] == 126.827)
assert (benchmark.result['distilbert_base_uncased_ngpu_8_throughput'][0] == 966.796)
assert (benchmark.result['gpt2_ngpu_1_throughput'][0] == 20.46)
assert (benchmark.result['gpt2_ngpu_8_throughput'][0] == 151.089)
assert (benchmark.result['facebook_bart_large_ngpu_1_throughput'][0] == 66.171)
assert (benchmark.result['facebook_bart_large_ngpu_8_throughput'][0] == 370.343)
assert (benchmark.result['roberta_large_ngpu_1_throughput'][0] == 37.103)
assert (benchmark.result['roberta_large_ngpu_8_throughput'][0] == 274.455)
......@@ -38,7 +38,7 @@ def test_flops_performance_cuda(self):
# Negative case - MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE.
benchmark = benchmark_class(
benchmark_name,
parameters='--num_warmup 200 --n 1024 --k 512 --m 2048 --precision FP32 TF32_TC FP16_TC INT8_TC'
parameters='--num_warmup 200 --n 1024 --k 512 --m 2048 --precision fp32 tf32_tc fp16_tc int8_tc'
)
ret = benchmark._preprocess()
......@@ -59,11 +59,11 @@ def test_flops_performance_cuda(self):
assert (benchmark._args.n == 1024)
assert (benchmark._args.k == 512)
assert (benchmark._args.m == 2048)
assert (benchmark._args.precision == ['FP32', 'TF32_TC', 'FP16_TC', 'INT8_TC'])
benchmark._CudaGemmFlopsBenchmark__precision_need_to_run = ['FP32', 'TF32_TC', 'FP16_TC', 'INT8_TC']
assert (benchmark._args.precision == ['fp32', 'tf32_tc', 'fp16_tc', 'int8_tc'])
benchmark._CudaGemmFlopsBenchmark__precision_need_to_run = ['fp32', 'tf32_tc', 'fp16_tc', 'int8_tc']
# Check results and metrics.
raw_output_FP32 = """
raw_output_fp32 = """
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
......@@ -72,7 +72,7 @@ def test_flops_performance_cuda(self):
1,CUTLASS,gemm,cutlass_simt_sgemm_128x128_8x2_tn_align1,passed,success,universal,16384,16384,16384,f32:row,f32:column,f32:column,1,0,1,1,simt,f32,128,128,8,2,4,2,1,1,1,1,50,1024,3221225472,8796629893120,482.034,6.22363,18249
1,CUTLASS,gemm,cutlass_simt_sgemm_128x128_8x2_tt_align1,passed,success,universal,16384,16384,16384,f32:row,f32:row,f32:column,1,0,1,1,simt,f32,128,128,8,2,4,2,1,1,1,1,50,1024,3221225472,8796629893120,481.838,6.22616,18256.4
"""
raw_output_TF32_TC = """
raw_output_tf32_tc = """
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
......@@ -81,7 +81,7 @@ def test_flops_performance_cuda(self):
1,CUTLASS,gemm,cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_tn_align4,passed,success,universal,16384,16384,16384,tf32:row,tf32:column,tf32:column,1,0,1,1,tensorop,f32,256,128,16,3,4,2,1,16,8,8,80,1024,3221225472,8796629893120,86.5167,34.6754,101676
1,CUTLASS,gemm,cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_tt_align4,passed,success,universal,16384,16384,16384,tf32:row,tf32:row,tf32:column,1,0,1,1,tensorop,f32,256,128,16,3,4,2,1,16,8,8,80,1024,3221225472,8796629893120,68.3621,43.884,128677
"""
raw_output_FP16_TC = """
raw_output_fp16_tc = """
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
......@@ -90,13 +90,13 @@ def test_flops_performance_cuda(self):
1,CUTLASS,gemm,cutlass_tensorop_h16816gemm_256x128_32x3_tn_align8,incorrect,success,universal,16384,16384,16384,f16:row,f16:column,f16:column,1,0,1,1,tensorop,f16,256,128,32,3,4,2,1,16,8,16,80,1024,1610612736,8796629893120,39.0413,38.4209,225316
1,CUTLASS,gemm,cutlass_tensorop_h16816gemm_256x128_32x3_tt_align8,incorrect,success,universal,16384,16384,16384,f16:row,f16:row,f16:column,1,0,1,1,tensorop,f16,256,128,32,3,4,2,1,16,8,16,80,1024,1610612736,8796629893120,31.2994,47.9243,281048
"""
assert (benchmark._process_raw_result(0, raw_output_FP32))
assert (benchmark._process_raw_result(1, raw_output_TF32_TC))
assert (benchmark._process_raw_result(2, raw_output_FP16_TC))
assert (benchmark._process_raw_result(0, raw_output_fp32))
assert (benchmark._process_raw_result(1, raw_output_tf32_tc))
assert (benchmark._process_raw_result(2, raw_output_fp16_tc))
assert (benchmark.result['FP32'][0] == 18369.7)
assert (benchmark.result['TF32_TC'][0] == 128677)
assert (benchmark.result['FP16_TC'][0] == 281048)
assert (benchmark.result['fp32_flops'][0] == 18369.7)
assert (benchmark.result['tf32_tc_flops'][0] == 128677)
assert (benchmark.result['fp16_tc_flops'][0] == 281048)
# Negative case - Add invalid raw output.
assert (benchmark._process_raw_result(3, 'Invalid raw output') is False)
......@@ -328,7 +328,7 @@ def test_cuda_memory_bw_performance(self):
bandwidthTest-D2D, Bandwidth = 762.8 GB/s, Time = 0.00009 s, Size = 68000000 bytes, NumDevsUsed = 1
Result = PASS
"""
for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
for i, metric in enumerate(['h2d_bw', 'd2h_bw', 'd2d_bw']):
assert (benchmark._process_raw_result(i, raw_output[i]))
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
......
......@@ -519,19 +519,19 @@ def test_disk_performance_result_parsing(self):
assert (1 == len(benchmark.result[jobname_prefix + '_write_iops']))
assert (85066.128925 == benchmark.result[jobname_prefix + '_write_iops'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_read_lat_ns_95.000000']))
assert (1941504 == benchmark.result[jobname_prefix + '_read_lat_ns_95.000000'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_read_lat_ns_99.000000']))
assert (2244608 == benchmark.result[jobname_prefix + '_read_lat_ns_99.000000'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_read_lat_ns_99.900000']))
assert (3620864 == benchmark.result[jobname_prefix + '_read_lat_ns_99.900000'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_write_lat_ns_95.000000']))
assert (1908736 == benchmark.result[jobname_prefix + '_write_lat_ns_95.000000'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_write_lat_ns_99.000000']))
assert (2072576 == benchmark.result[jobname_prefix + '_write_lat_ns_99.000000'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_write_lat_ns_99.900000']))
assert (2605056 == benchmark.result[jobname_prefix + '_write_lat_ns_99.900000'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_read_lat_ns_95.0']))
assert (1941504 == benchmark.result[jobname_prefix + '_read_lat_ns_95.0'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_read_lat_ns_99.0']))
assert (2244608 == benchmark.result[jobname_prefix + '_read_lat_ns_99.0'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_read_lat_ns_99.9']))
assert (3620864 == benchmark.result[jobname_prefix + '_read_lat_ns_99.9'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_write_lat_ns_95.0']))
assert (1908736 == benchmark.result[jobname_prefix + '_write_lat_ns_95.0'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_write_lat_ns_99.0']))
assert (2072576 == benchmark.result[jobname_prefix + '_write_lat_ns_99.0'][0])
assert (1 == len(benchmark.result[jobname_prefix + '_write_lat_ns_99.9']))
assert (2605056 == benchmark.result[jobname_prefix + '_write_lat_ns_99.9'][0])
# Negative case - invalid raw output.
assert (benchmark._process_raw_result(1, 'Invalid raw output') is False)
......
......@@ -72,7 +72,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
return True
def test_memory_bw_performance_base():
def test_gemm_flops_performance_base():
"""Test GemmFlopsBenchmark."""
# Positive case - memory=pinned.
benchmark = FakeGemmFlopsBenchmark('fake')
......@@ -81,49 +81,49 @@ def test_memory_bw_performance_base():
assert (benchmark.return_code == ReturnCode.SUCCESS)
# Check command list
expected_command = [
'echo "--precision FP64 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision FP32 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision FP16 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision FP64_TC --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision TF32_TC --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision BF16_TC --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision FP16_TC --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision INT8_TC --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision INT4_TC --m 16384 --n 16384 --k 16384 --num_warmup 5"'
'echo "--precision fp64 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision fp32 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision fp16 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision fp64_tc --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision tf32_tc --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision bf16_tc --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision fp16_tc --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision int8_tc --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision int4_tc --m 16384 --n 16384 --k 16384 --num_warmup 5"'
]
for i in range(len(expected_command)):
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
assert (command == expected_command[i])
for i, metric in enumerate(
['FP64', 'FP32', 'FP16', 'FP64_TC', 'TF32_TC', 'BF16_TC', 'FP16_TC', 'INT8_TC', 'INT4_TC']
['fp64', 'fp32', 'fp16', 'fp64_tc', 'tf32_tc', 'bf16_tc', 'fp16_tc', 'int8_tc', 'int4_tc']
):
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
# Positive case - memory=unpinned.
benchmark = FakeGemmFlopsBenchmark('fake', parameters='--precision FP64 FP32 FP16')
benchmark = FakeGemmFlopsBenchmark('fake', parameters='--precision fp64 fp32 fp16')
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
assert (benchmark.run())
assert (benchmark.return_code == ReturnCode.SUCCESS)
# Check command list
expected_command = [
'echo "--precision FP64 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision FP32 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision FP16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
'echo "--precision fp64 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision fp32 --m 16384 --n 16384 --k 16384 --num_warmup 5"',
'echo "--precision fp16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
]
for i in range(len(expected_command)):
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
assert (command == expected_command[i])
for i, metric in enumerate(['FP64', 'FP32', 'FP16']):
for i, metric in enumerate(['fp64', 'fp32', 'fp16']):
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
benchmark = FakeGemmFlopsBenchmark('fake', parameters='--precision FP64 BF64')
benchmark = FakeGemmFlopsBenchmark('fake', parameters='--precision fp64 bf64')
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
assert (benchmark.run() is True)
# Negative case - INVALID_ARGUMENT.
benchmark = FakeGemmFlopsBenchmark('fake', parameters='--precision BF64')
benchmark = FakeGemmFlopsBenchmark('fake', parameters='--precision bf64')
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
assert (benchmark.run() is False)
assert (benchmark.return_code == ReturnCode.NO_SUPPORTED_PRECISION)
......@@ -98,14 +98,19 @@ def test_gpcnet_network_test(self):
# Check function process_raw_data.
# Positive case - valid raw output.
assert (benchmark._process_raw_result(0, raw_output))
test_name = 'IsolatedNetworkTests'
metric_list = [
'RRTwo-sidedLat(8B)', 'RRGetLat(8B)', 'RRTwo-sidedBW(131072B)', 'RRPutBW(131072B)',
'RRTwo-sidedBW+Sync(131072B)', 'NatTwo-sidedBW(131072B)', 'MultipleAllreduce(8B)', 'MultipleAlltoall(4096B)'
'rr_two-sided_lat',
'rr_get_lat',
'rr_two-sided_bw',
'rr_put_bw',
'rr_two-sided+sync_bw',
'nat_two-sided_bw',
'multiple_allreduce_time',
'multiple_alltoall_bw',
]
for metric_medium in metric_list:
for suffix in ['Avg', '99%']:
metric = test_name + '_' + metric_medium + '_' + suffix
for suffix in ['avg', '99%']:
metric = metric_medium + '_' + suffix
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
......@@ -253,58 +258,10 @@ def test_gpcnet_network_load(self): # noqa: C901
assert (len(benchmark.result) == benchmark.default_metric_count)
# Positive case - valid raw output.
assert (benchmark._process_raw_result(0, raw_output))
test_name = 'IsolatedNetworkTests'
metric_list = ['RRTwo-sidedLat(8B)', 'RRTwo-sidedBW+Sync(131072B)', 'MultipleAllreduce(8B)']
metric_list = ['rr_two-sided_lat_x', 'rr_two-sided+sync_bw_x', 'multiple_allreduce_x']
for metric_medium in metric_list:
for suffix in ['Max', 'Min', 'Avg', '99.9%']:
metric = test_name + '_' + metric_medium + '_' + suffix
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
test_name = 'IsolatedCongestionTests'
metric_list = ['GetBcast(4096B)', 'PutIncast(4096B)', 'Two-sidedIncast(4096B)', 'Alltoall(4096B)']
for metric_medium in metric_list:
for suffix in ['Max', 'Min', 'Avg', '99.9%']:
metric = test_name + '_' + metric_medium + '_' + suffix
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
test_name = 'NetworkTestsrunningwithCongestionTests(RRTwo-sidedLatNetworkTest)'
metric_list = [
'GetBcast(4096B)', 'PutIncast(4096B)', 'Two-sidedIncast(4096B)', 'Alltoall(4096B)', 'RRTwo-sidedLat(8B)'
]
for metric_medium in metric_list:
for suffix in ['Max', 'Min', 'Avg', '99.9%']:
metric = test_name + '_' + metric_medium + '_' + suffix
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
test_name = 'NetworkTestsrunningwithCongestionTests(RRTwo-sidedBW+SyncNetworkTest)'
metric_list = [
'GetBcast(4096B)', 'PutIncast(4096B)', 'Two-sidedIncast(4096B)', 'Alltoall(4096B)',
'RRTwo-sidedBW+Sync(131072B)'
]
for metric_medium in metric_list:
for suffix in ['Max', 'Min', 'Avg', '99.9%']:
metric = test_name + '_' + metric_medium + '_' + suffix
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
test_name = 'NetworkTestsrunningwithCongestionTests(MultipleAllreduceNetworkTest)'
metric_list = [
'GetBcast(4096B)', 'PutIncast(4096B)', 'Two-sidedIncast(4096B)', 'Alltoall(4096B)', 'MultipleAllreduce(8B)'
]
for metric_medium in metric_list:
for suffix in ['Max', 'Min', 'Avg', '99.9%']:
metric = test_name + '_' + metric_medium + '_' + suffix
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
test_name = 'NetworkTestsrunningwithCongestionTests-KeyResults'
metric_list = ['RRTwo-sidedLat(8B)', 'RRTwo-sidedBW+Sync(131072B)', 'MultipleAllreduce(8B)']
for metric_medium in metric_list:
for suffix in ['Avg', '99%']:
metric = test_name + '_' + metric_medium + '_' + suffix
for suffix in ['avg', '99%']:
metric = metric_medium + '_' + suffix
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
......
......@@ -119,8 +119,8 @@ def _test_gpu_copy_bw_performance_result_parsing(self, platform):
else:
assert (len(benchmark.result[output_key]) == 1)
assert (isinstance(benchmark.result[output_key][0], numbers.Number))
assert (output_key in test_raw_output_dict)
assert (test_raw_output_dict[output_key] == benchmark.result[output_key][0])
assert (output_key.strip('_bw') in test_raw_output_dict)
assert (test_raw_output_dict[output_key.strip('_bw')] == benchmark.result[output_key][0])
# Negative case - invalid raw output.
assert (benchmark._process_raw_result(1, 'Invalid raw output') is False)
......
......@@ -158,7 +158,7 @@ def test_ib_loopback_all_sizes(self, mock_ib_devices, mock_numa_cores, mock_port
metric_list = []
for ib_command in benchmark._args.commands:
for size in ['8388608', '4194304', '1024', '2']:
metric = 'IB_{}_{}_Avg_{}'.format(ib_command, size, str(benchmark._args.ib_index))
metric = 'ib_{}_{}_ib{}_bw'.format(ib_command, size, str(benchmark._args.ib_index))
metric_list.append(metric)
for metric in metric_list:
assert (metric in benchmark.result)
......@@ -270,7 +270,7 @@ def test_ib_loopback_8M_size(self, mock_ib_devices, mock_numa_cores, mock_port):
# Positive case - valid raw output.
metric_list = []
for ib_command in benchmark._args.commands:
metric = 'IB_{}_8388608_Avg_{}'.format(ib_command, str(benchmark._args.ib_index))
metric = 'ib_{}_8388608_ib{}_bw'.format(ib_command, str(benchmark._args.ib_index))
metric_list.append(metric)
for metric in metric_list:
assert (metric in benchmark.result)
......
......@@ -27,6 +27,9 @@ def setUp(self):
def tearDown(self):
"""Method called after the test method has been called and the result recorded."""
self.__binary_file.unlink()
p = Path('hostfile')
if p.is_file():
p.unlink()
def test_generate_config(self): # noqa: C901
"""Test util functions ."""
......@@ -126,15 +129,18 @@ def test_ib_traffic_performance(self, mock_ib_devices):
# Check preprocess
# Negative cases
parameters = '--ib_index 0 --iters 2000 --pattern one-to-one'
parameters = '--ib_index 0 --iters 2000 --pattern one-to-one --hostfile hostfile'
benchmark = benchmark_class(benchmark_name, parameters=parameters)
mock_ib_devices.return_value = None
ret = benchmark._preprocess()
assert (ret is False)
assert (benchmark.return_code == ReturnCode.MICROBENCHMARK_MPI_INIT_FAILURE)
hosts = ['node0\n', 'node1\n', 'node2\n', 'node3\n']
with open('hostfile', 'w') as f:
f.writelines(hosts)
os.environ['OMPI_COMM_WORLD_SIZE'] = '4'
parameters = '--ib_index 0 --iters 2000 --pattern one-to-one'
parameters = '--ib_index 0 --iters 2000 --pattern one-to-one --hostfile hostfile'
benchmark = benchmark_class(benchmark_name, parameters=parameters)
mock_ib_devices.return_value = None
ret = benchmark._preprocess()
......@@ -143,21 +149,21 @@ def test_ib_traffic_performance(self, mock_ib_devices):
# Positive cases
os.environ['OMPI_COMM_WORLD_SIZE'] = '3'
parameters = '--ib_index 0 --iters 2000 --pattern one-to-one'
parameters = '--ib_index 0 --iters 2000 --pattern one-to-one --hostfile hostfile'
benchmark = benchmark_class(benchmark_name, parameters=parameters)
mock_ib_devices.return_value = ['mlx5_0']
ret = benchmark._preprocess()
assert (ret is True)
# Generate config
parameters = '--ib_index 0 --iters 2000 --msg_size 33554432'
parameters = '--ib_index 0 --iters 2000 --msg_size 33554432 --hostfile hostfile'
benchmark = benchmark_class(benchmark_name, parameters=parameters)
os.environ['OMPI_COMM_WORLD_SIZE'] = '4'
mock_ib_devices.return_value = ['mlx5_0']
ret = benchmark._preprocess()
Path('config.txt').unlink()
assert (ret)
expect_command = 'ib_validation --hostfile /root/hostfile --cmd_prefix "ib_write_bw -F ' + \
expect_command = 'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F ' + \
'--iters=2000 -d mlx5_0 -s 33554432" --input_config ' + os.getcwd() + '/config.txt'
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
assert (command == expect_command)
......@@ -167,14 +173,14 @@ def test_ib_traffic_performance(self, mock_ib_devices):
with open('test_config.txt', 'w') as f:
for line in config:
f.write(line + '\n')
parameters = '--ib_index 0 --iters 2000 --msg_size 33554432 --config test_config.txt'
parameters = '--ib_index 0 --iters 2000 --msg_size 33554432 --config test_config.txt --hostfile hostfile'
benchmark = benchmark_class(benchmark_name, parameters=parameters)
os.environ['OMPI_COMM_WORLD_SIZE'] = '2'
mock_ib_devices.return_value = ['mlx5_0']
ret = benchmark._preprocess()
Path('test_config.txt').unlink()
assert (ret)
expect_command = 'ib_validation --hostfile /root/hostfile --cmd_prefix "ib_write_bw -F ' + \
expect_command = 'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F ' + \
'--iters=2000 -d mlx5_0 -s 33554432" --input_config test_config.txt'
command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
......
......@@ -36,7 +36,7 @@ def test_kernel_launch_overhead():
assert ('raw_output_0' in benchmark.raw_data)
assert (len(benchmark.raw_data['raw_output_0']) == 1)
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
for metric in ['event_overhead', 'wall_overhead']:
for metric in ['event_time', 'wall_time']:
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
......@@ -35,6 +35,6 @@ def test_pytorch_matmul():
# Check results and metrics.
assert (benchmark.run_count == 2)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert (len(benchmark.raw_data['nosharding']) == benchmark.run_count)
assert (len(benchmark.raw_data['nosharding'][0]) == benchmark._args.num_steps)
assert (len(benchmark.result['nosharding']) == benchmark.run_count)
assert (len(benchmark.raw_data['nosharding_time']) == benchmark.run_count)
assert (len(benchmark.raw_data['nosharding_time'][0]) == benchmark._args.num_steps)
assert (len(benchmark.result['nosharding_time']) == benchmark.run_count)
......@@ -83,7 +83,7 @@ def test_memory_bw_performance_base():
for i in range(len(expected_command)):
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
assert (command == expected_command[i])
for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
for i, metric in enumerate(['h2d_bw', 'd2h_bw', 'd2d_bw']):
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
......@@ -97,7 +97,7 @@ def test_memory_bw_performance_base():
for i in range(len(expected_command)):
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
assert (command == expected_command[i])
for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
for i, metric in enumerate(['h2d_bw', 'd2h_bw', 'd2d_bw']):
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
......
......@@ -92,11 +92,11 @@ def test_rocm_flops_performance(self):
assert (benchmark._process_raw_result(3, raw_output_BF16_X))
assert (benchmark._process_raw_result(4, raw_output_INT8_X))
assert (benchmark.result['FP64'][0] == 10037.5)
assert (benchmark.result['FP32_xDLOPS'][0] == 39441.6)
assert (benchmark.result['FP16_xDLOPS'][0] == 153728)
assert (benchmark.result['BF16_xDLOPS'][0] == 81374.3)
assert (benchmark.result['INT8_xDLOPS'][0] == 162675)
assert (benchmark.result['fp64_flops'][0] == 10037.5)
assert (benchmark.result['fp32_xdlops_flops'][0] == 39441.6)
assert (benchmark.result['fp16_xdlops_flops'][0] == 153728)
assert (benchmark.result['bf16_xdlops_flops'][0] == 81374.3)
assert (benchmark.result['int8_xdlops_iops'][0] == 162675)
# Negative case - Add invalid raw output.
assert (benchmark._process_raw_result(4, 'Invalid raw output') is False)
......@@ -159,11 +159,11 @@ def test_rocm_memory_bw_performance(self):
might occur with a mixture of architectural capabilities.
"""
for i, metric in enumerate(['htod_524288kB', 'htod_524288kB']):
for i, metric in enumerate(['h2d_bw', 'd2h_bw']):
assert (benchmark._process_raw_result(i, raw_output[i]))
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
assert (benchmark.result['htod_524288kB'][0] == 24.6708)
assert (benchmark.result['dtoh_524288kB'][0] == 27.9348)
assert (benchmark.result['h2d_bw'][0] == 25.2351)
assert (benchmark.result['d2h_bw'][0] == 27.9348)
......@@ -44,7 +44,7 @@ def test_pytorch_sharding_matmul():
# Check results and metrics.
assert (benchmark.run_count == 2)
assert (benchmark.return_code == ReturnCode.SUCCESS)
for metric in ['allreduce', 'allgather']:
for metric in ['allreduce_time', 'allgather_time']:
assert (len(benchmark.raw_data[metric]) == benchmark.run_count)
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
assert (len(benchmark.result[metric]) == benchmark.run_count)
......
......@@ -52,15 +52,15 @@ def test_tcp_connectivity(self):
assert (benchmark.result)
# Check results and metrics.
assert (benchmark.result['Successed_api.github.com'][0] == 10)
assert (benchmark.result['Failed_api.github.com'][0] == 0)
assert (benchmark.result['Success_Rate_api.github.com'][0] == 100.0)
assert (isinstance(benchmark.result['Minimum_api.github.com'][0], numbers.Number))
assert (isinstance(benchmark.result['Maximum_api.github.com'][0], numbers.Number))
assert (isinstance(benchmark.result['Average_api.github.com'][0], numbers.Number))
assert (isinstance(benchmark.result['Successed_localhost'][0], numbers.Number))
assert (isinstance(benchmark.result['Failed_localhost'][0], numbers.Number))
assert (isinstance(benchmark.result['Maximum_localhost'][0], numbers.Number))
assert (isinstance(benchmark.result['Minimum_localhost'][0], numbers.Number))
assert (isinstance(benchmark.result['Average_localhost'][0], numbers.Number))
assert (benchmark.result['api.github.com_successed_count'][0] == 10)
assert (benchmark.result['api.github.com_failed_count'][0] == 0)
assert (benchmark.result['api.github.com_success_rate'][0] == 100.0)
assert (isinstance(benchmark.result['api.github.com_time_min'][0], numbers.Number))
assert (isinstance(benchmark.result['api.github.com_time_max'][0], numbers.Number))
assert (isinstance(benchmark.result['api.github.com_time_avg'][0], numbers.Number))
assert (isinstance(benchmark.result['localhost_successed_count'][0], numbers.Number))
assert (isinstance(benchmark.result['localhost_failed_count'][0], numbers.Number))
assert (isinstance(benchmark.result['localhost_time_max'][0], numbers.Number))
assert (isinstance(benchmark.result['localhost_time_min'][0], numbers.Number))
assert (isinstance(benchmark.result['localhost_time_avg'][0], numbers.Number))
assert (benchmark.return_code == ReturnCode.SUCCESS)
......@@ -135,9 +135,9 @@ def test_tensorrt_inference_result_parsing(self, test_raw_log):
self.assertEqual(6 + benchmark.default_metric_count, len(benchmark.result))
for tag in ['mean', '99']:
self.assertEqual(0.5, benchmark.result[f'gpu_lat_ms_{tag}'][0])
self.assertEqual(0.6, benchmark.result[f'host_lat_ms_{tag}'][0])
self.assertEqual(1.0, benchmark.result[f'end_to_end_lat_ms_{tag}'][0])
self.assertEqual(0.5, benchmark.result[f'model_0_gpu_time_{tag}'][0])
self.assertEqual(0.6, benchmark.result[f'model_0_host_time_{tag}'][0])
self.assertEqual(1.0, benchmark.result[f'model_0_end_to_end_time_{tag}'][0])
# Negative case - invalid raw output
self.assertFalse(benchmark._process_raw_result(1, 'Invalid raw output'))
......@@ -223,10 +223,10 @@ def test_train():
expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {'
'"steptime_train_float32": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "steptime_train_float32": [2.0], "throughput_train_float32": [16000.0]}, '
'"reduce_op": {"steptime_train_float32": "max", "throughput_train_float32": "min"}}'
'"fp32_train_step_time": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"fp32_train_throughput": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "fp32_train_step_time": [2.0], "fp32_train_throughput": [16000.0]}, '
'"reduce_op": {"fp32_train_step_time": "max", "fp32_train_throughput": "min"}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__train(Precision.FLOAT32))
......@@ -249,10 +249,11 @@ def test_inference():
expected_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {'
'"steptime_inference_float16": [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]], '
'"throughput_inference_float16": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"result": {"return_code": [0], "steptime_inference_float16": [4.0], "throughput_inference_float16": '
'[8000.0]}, "reduce_op": {"steptime_inference_float16": null, "throughput_inference_float16": null}}'
'"fp16_inference_step_time": [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]], '
'"fp16_inference_throughput": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"result": {"return_code": [0], '
'"fp16_inference_step_time": [4.0], "fp16_inference_throughput": [8000.0]}, '
'"reduce_op": {"fp16_inference_step_time": null, "fp16_inference_throughput": null}}'
)
assert (benchmark._preprocess())
assert (benchmark._ModelBenchmark__inference(Precision.FLOAT16))
......@@ -280,31 +281,31 @@ def test_benchmark():
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
expected_raw_data = {
'steptime_train_float32': [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]],
'throughput_train_float32': [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]],
'steptime_train_float16': [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]],
'throughput_train_float16': [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]
'fp32_train_step_time': [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]],
'fp32_train_throughput': [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]],
'fp16_train_step_time': [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]],
'fp16_train_throughput': [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]
}
assert (benchmark.raw_data == expected_raw_data)
expected_result = {
'return_code': [0],
'steptime_train_float32': [2.0],
'throughput_train_float32': [16000.0],
'steptime_train_float16': [2.0],
'throughput_train_float16': [16000.0]
'fp32_train_step_time': [2.0],
'fp32_train_throughput': [16000.0],
'fp16_train_step_time': [2.0],
'fp16_train_throughput': [16000.0]
}
assert (benchmark.result == expected_result)
expected_serialized_result = (
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, "start_time": null, '
'"end_time": null, "raw_data": {"steptime_train_float32": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"throughput_train_float32": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
'"steptime_train_float16": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"throughput_train_float16": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "steptime_train_float32": [2.0], "throughput_train_float32": [16000.0], '
'"steptime_train_float16": [2.0], "throughput_train_float16": [16000.0]}, '
'"reduce_op": {"steptime_train_float32": "max", "throughput_train_float32": "min", '
'"steptime_train_float16": "max", "throughput_train_float16": "min"}}'
'"end_time": null, "raw_data": {"fp32_train_step_time": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"fp32_train_throughput": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
'"fp16_train_step_time": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"fp16_train_throughput": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "fp32_train_step_time": [2.0], "fp32_train_throughput": [16000.0], '
'"fp16_train_step_time": [2.0], "fp16_train_throughput": [16000.0]}, '
'"reduce_op": {"fp32_train_step_time": "max", "fp32_train_throughput": "min", '
'"fp16_train_step_time": "max", "fp16_train_throughput": "min"}}'
)
assert (benchmark.serialized_result == expected_serialized_result)
......
......@@ -188,8 +188,7 @@ def test_pytorch_base():
# Test results.
for metric in [
'steptime_train_float32', 'steptime_inference_float32', 'throughput_train_float32',
'throughput_inference_float32'
'fp32_train_step_time', 'fp32_inference_step_time', 'fp32_train_throughput', 'fp32_inference_throughput'
]:
assert (len(benchmark.raw_data[metric]) == 1)
assert (len(benchmark.raw_data[metric][0]) == 64)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment