Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
9f56b219
Unverified
Commit
9f56b219
authored
Dec 09, 2021
by
Yuting Jiang
Committed by
GitHub
Dec 09, 2021
Browse files
Benchmarks: Unify metric names of benchmarks (#252)
**Description** Unify metric names of benchmarks.
parent
c13ed2a2
Changes
44
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
144 additions
and
178 deletions
+144
-178
superbench/benchmarks/model_benchmarks/model_base.py
superbench/benchmarks/model_benchmarks/model_base.py
+5
-2
tests/benchmarks/docker_benchmarks/test_rocm_onnxruntime_performance.py
...ks/docker_benchmarks/test_rocm_onnxruntime_performance.py
+10
-10
tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py
...arks/micro_benchmarks/test_cuda_gemm_flops_performance.py
+12
-12
tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py
...marks/micro_benchmarks/test_cuda_memory_bw_performance.py
+1
-1
tests/benchmarks/micro_benchmarks/test_disk_performance.py
tests/benchmarks/micro_benchmarks/test_disk_performance.py
+13
-13
tests/benchmarks/micro_benchmarks/test_gemm_flops_performance_base.py
...arks/micro_benchmarks/test_gemm_flops_performance_base.py
+18
-18
tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py
tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py
+13
-56
tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py
...nchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py
+2
-2
tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py
...nchmarks/micro_benchmarks/test_ib_loopback_performance.py
+2
-2
tests/benchmarks/micro_benchmarks/test_ib_traffic_performance.py
...enchmarks/micro_benchmarks/test_ib_traffic_performance.py
+13
-7
tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py
...enchmarks/micro_benchmarks/test_kernel_launch_overhead.py
+1
-1
tests/benchmarks/micro_benchmarks/test_matmul.py
tests/benchmarks/micro_benchmarks/test_matmul.py
+3
-3
tests/benchmarks/micro_benchmarks/test_memory_bw_performance_base.py
...marks/micro_benchmarks/test_memory_bw_performance_base.py
+2
-2
tests/benchmarks/micro_benchmarks/test_rocm_gemm_flops_performance.py
...arks/micro_benchmarks/test_rocm_gemm_flops_performance.py
+5
-5
tests/benchmarks/micro_benchmarks/test_rocm_memory_bw_performance.py
...marks/micro_benchmarks/test_rocm_memory_bw_performance.py
+3
-3
tests/benchmarks/micro_benchmarks/test_sharding_matmul.py
tests/benchmarks/micro_benchmarks/test_sharding_matmul.py
+1
-1
tests/benchmarks/micro_benchmarks/test_tcp_connectivity.py
tests/benchmarks/micro_benchmarks/test_tcp_connectivity.py
+11
-11
tests/benchmarks/micro_benchmarks/test_tensorrt_inference_performance.py
...s/micro_benchmarks/test_tensorrt_inference_performance.py
+3
-3
tests/benchmarks/model_benchmarks/test_model_base.py
tests/benchmarks/model_benchmarks/test_model_base.py
+25
-24
tests/benchmarks/model_benchmarks/test_pytorch_base.py
tests/benchmarks/model_benchmarks/test_pytorch_base.py
+1
-2
No files found.
superbench/benchmarks/model_benchmarks/model_base.py
View file @
9f56b219
...
...
@@ -373,7 +373,10 @@ class ModelBenchmark(Benchmark):
)
return
False
metric
=
'steptime_{}_{}'
.
format
(
model_action
,
precision
)
precision_metric
=
{
'float16'
:
'fp16'
,
'float32'
:
'fp32'
,
'float64'
:
'fp64'
,
'bfloat16'
:
'bf16'
}
if
precision
.
value
in
precision_metric
.
keys
():
precision
=
precision_metric
[
precision
.
value
]
metric
=
'{}_{}_step_time'
.
format
(
precision
,
model_action
)
self
.
_result
.
add_raw_data
(
metric
,
step_times
)
avg
=
statistics
.
mean
(
step_times
)
self
.
_result
.
add_result
(
metric
,
avg
,
reduce_type
=
ReduceType
.
MAX
if
model_action
is
ModelAction
.
TRAIN
else
None
)
...
...
@@ -381,7 +384,7 @@ class ModelBenchmark(Benchmark):
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
millisecond_per_second
=
1000
throughput
=
[
millisecond_per_second
/
step_time
*
self
.
_args
.
batch_size
for
step_time
in
step_times
]
metric
=
'throughput
_{}_{}
'
.
format
(
model_action
,
precis
ion
)
metric
=
'
{}_{}_
throughput'
.
format
(
precision
,
model_act
ion
)
self
.
_result
.
add_raw_data
(
metric
,
throughput
)
avg
=
statistics
.
mean
(
throughput
)
self
.
_result
.
add_result
(
metric
,
avg
,
reduce_type
=
ReduceType
.
MIN
if
model_action
is
ModelAction
.
TRAIN
else
None
)
...
...
tests/benchmarks/docker_benchmarks/test_rocm_onnxruntime_performance.py
View file @
9f56b219
...
...
@@ -44,13 +44,13 @@ __superbench__ begin roberta-large ngpu=8
"samples_per_second": 274.455
"""
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
assert
(
benchmark
.
result
[
'bert_large_uncased_ngpu_1'
][
0
]
==
21.829
)
assert
(
benchmark
.
result
[
'bert_large_uncased_ngpu_8'
][
0
]
==
147.181
)
assert
(
benchmark
.
result
[
'distilbert_base_uncased_ngpu_1'
][
0
]
==
126.827
)
assert
(
benchmark
.
result
[
'distilbert_base_uncased_ngpu_8'
][
0
]
==
966.796
)
assert
(
benchmark
.
result
[
'gpt2_ngpu_1'
][
0
]
==
20.46
)
assert
(
benchmark
.
result
[
'gpt2_ngpu_8'
][
0
]
==
151.089
)
assert
(
benchmark
.
result
[
'facebook_bart_large_ngpu_1'
][
0
]
==
66.171
)
assert
(
benchmark
.
result
[
'facebook_bart_large_ngpu_8'
][
0
]
==
370.343
)
assert
(
benchmark
.
result
[
'roberta_large_ngpu_1'
][
0
]
==
37.103
)
assert
(
benchmark
.
result
[
'roberta_large_ngpu_8'
][
0
]
==
274.455
)
assert
(
benchmark
.
result
[
'bert_large_uncased_ngpu_1
_throughput
'
][
0
]
==
21.829
)
assert
(
benchmark
.
result
[
'bert_large_uncased_ngpu_8
_throughput
'
][
0
]
==
147.181
)
assert
(
benchmark
.
result
[
'distilbert_base_uncased_ngpu_1
_throughput
'
][
0
]
==
126.827
)
assert
(
benchmark
.
result
[
'distilbert_base_uncased_ngpu_8
_throughput
'
][
0
]
==
966.796
)
assert
(
benchmark
.
result
[
'gpt2_ngpu_1
_throughput
'
][
0
]
==
20.46
)
assert
(
benchmark
.
result
[
'gpt2_ngpu_8
_throughput
'
][
0
]
==
151.089
)
assert
(
benchmark
.
result
[
'facebook_bart_large_ngpu_1
_throughput
'
][
0
]
==
66.171
)
assert
(
benchmark
.
result
[
'facebook_bart_large_ngpu_8
_throughput
'
][
0
]
==
370.343
)
assert
(
benchmark
.
result
[
'roberta_large_ngpu_1
_throughput
'
][
0
]
==
37.103
)
assert
(
benchmark
.
result
[
'roberta_large_ngpu_8
_throughput
'
][
0
]
==
274.455
)
tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py
View file @
9f56b219
...
...
@@ -38,7 +38,7 @@ class CudaGemmFlopsBenchmarkTest(unittest.TestCase):
# Negative case - MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE.
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
'--num_warmup 200 --n 1024 --k 512 --m 2048 --precision
FP
32
TF
32_
TC FP16_TC INT8_TC
'
parameters
=
'--num_warmup 200 --n 1024 --k 512 --m 2048 --precision
fp
32
tf
32_
tc fp16_tc int8_tc
'
)
ret
=
benchmark
.
_preprocess
()
...
...
@@ -59,11 +59,11 @@ class CudaGemmFlopsBenchmarkTest(unittest.TestCase):
assert
(
benchmark
.
_args
.
n
==
1024
)
assert
(
benchmark
.
_args
.
k
==
512
)
assert
(
benchmark
.
_args
.
m
==
2048
)
assert
(
benchmark
.
_args
.
precision
==
[
'
FP
32'
,
'
TF
32_
TC
'
,
'
FP
16_
TC
'
,
'
INT8_TC
'
])
benchmark
.
_CudaGemmFlopsBenchmark__precision_need_to_run
=
[
'
FP
32'
,
'
TF
32_
TC
'
,
'
FP
16_
TC
'
,
'
INT8_TC
'
]
assert
(
benchmark
.
_args
.
precision
==
[
'
fp
32'
,
'
tf
32_
tc
'
,
'
fp
16_
tc
'
,
'
int8_tc
'
])
benchmark
.
_CudaGemmFlopsBenchmark__precision_need_to_run
=
[
'
fp
32'
,
'
tf
32_
tc
'
,
'
fp
16_
tc
'
,
'
int8_tc
'
]
# Check results and metrics.
raw_output_
FP
32
=
"""
raw_output_
fp
32
=
"""
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
...
...
@@ -72,7 +72,7 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
1,CUTLASS,gemm,cutlass_simt_sgemm_128x128_8x2_tn_align1,passed,success,universal,16384,16384,16384,f32:row,f32:column,f32:column,1,0,1,1,simt,f32,128,128,8,2,4,2,1,1,1,1,50,1024,3221225472,8796629893120,482.034,6.22363,18249
1,CUTLASS,gemm,cutlass_simt_sgemm_128x128_8x2_tt_align1,passed,success,universal,16384,16384,16384,f32:row,f32:row,f32:column,1,0,1,1,simt,f32,128,128,8,2,4,2,1,1,1,1,50,1024,3221225472,8796629893120,481.838,6.22616,18256.4
"""
raw_output_
TF
32_
TC
=
"""
raw_output_
tf
32_
tc
=
"""
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
...
...
@@ -81,7 +81,7 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
1,CUTLASS,gemm,cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_tn_align4,passed,success,universal,16384,16384,16384,tf32:row,tf32:column,tf32:column,1,0,1,1,tensorop,f32,256,128,16,3,4,2,1,16,8,8,80,1024,3221225472,8796629893120,86.5167,34.6754,101676
1,CUTLASS,gemm,cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_tt_align4,passed,success,universal,16384,16384,16384,tf32:row,tf32:row,tf32:column,1,0,1,1,tensorop,f32,256,128,16,3,4,2,1,16,8,8,80,1024,3221225472,8796629893120,68.3621,43.884,128677
"""
raw_output_
FP
16_
TC
=
"""
raw_output_
fp
16_
tc
=
"""
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
...
...
@@ -90,13 +90,13 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
1,CUTLASS,gemm,cutlass_tensorop_h16816gemm_256x128_32x3_tn_align8,incorrect,success,universal,16384,16384,16384,f16:row,f16:column,f16:column,1,0,1,1,tensorop,f16,256,128,32,3,4,2,1,16,8,16,80,1024,1610612736,8796629893120,39.0413,38.4209,225316
1,CUTLASS,gemm,cutlass_tensorop_h16816gemm_256x128_32x3_tt_align8,incorrect,success,universal,16384,16384,16384,f16:row,f16:row,f16:column,1,0,1,1,tensorop,f16,256,128,32,3,4,2,1,16,8,16,80,1024,1610612736,8796629893120,31.2994,47.9243,281048
"""
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output_
FP
32
))
assert
(
benchmark
.
_process_raw_result
(
1
,
raw_output_
TF
32_
TC
))
assert
(
benchmark
.
_process_raw_result
(
2
,
raw_output_
FP
16_
TC
))
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output_
fp
32
))
assert
(
benchmark
.
_process_raw_result
(
1
,
raw_output_
tf
32_
tc
))
assert
(
benchmark
.
_process_raw_result
(
2
,
raw_output_
fp
16_
tc
))
assert
(
benchmark
.
result
[
'
FP32
'
][
0
]
==
18369.7
)
assert
(
benchmark
.
result
[
'
TF
32_
TC
'
][
0
]
==
128677
)
assert
(
benchmark
.
result
[
'
FP
16_
TC
'
][
0
]
==
281048
)
assert
(
benchmark
.
result
[
'
fp32_flops
'
][
0
]
==
18369.7
)
assert
(
benchmark
.
result
[
'
tf
32_
tc_flops
'
][
0
]
==
128677
)
assert
(
benchmark
.
result
[
'
fp
16_
tc_flops
'
][
0
]
==
281048
)
# Negative case - Add invalid raw output.
assert
(
benchmark
.
_process_raw_result
(
3
,
'Invalid raw output'
)
is
False
)
tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py
View file @
9f56b219
...
...
@@ -328,7 +328,7 @@ bandwidthTest-D2D, Bandwidth = 772.0 GB/s, Time = 0.00008 s, Size = 64000000 byt
bandwidthTest-D2D, Bandwidth = 762.8 GB/s, Time = 0.00009 s, Size = 68000000 bytes, NumDevsUsed = 1
Result = PASS
"""
for
i
,
metric
in
enumerate
([
'
H2D_Mem_BW'
,
'D2H_Mem_BW'
,
'D2D_Mem_BW
'
]):
for
i
,
metric
in
enumerate
([
'
h2d_bw'
,
'd2h_bw'
,
'd2d_bw
'
]):
assert
(
benchmark
.
_process_raw_result
(
i
,
raw_output
[
i
]))
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
...
...
tests/benchmarks/micro_benchmarks/test_disk_performance.py
View file @
9f56b219
...
...
@@ -519,19 +519,19 @@ class DiskBenchmarkTest(unittest.TestCase):
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_iops'
]))
assert
(
85066.128925
==
benchmark
.
result
[
jobname_prefix
+
'_write_iops'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_95.0
00000
'
]))
assert
(
1941504
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_95.0
00000
'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.0
00000
'
]))
assert
(
2244608
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.0
00000
'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.9
00000
'
]))
assert
(
3620864
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.9
00000
'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_95.0
00000
'
]))
assert
(
1908736
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_95.0
00000
'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.0
00000
'
]))
assert
(
2072576
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.0
00000
'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.9
00000
'
]))
assert
(
2605056
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.9
00000
'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_95.0'
]))
assert
(
1941504
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_95.0'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.0'
]))
assert
(
2244608
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.0'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.9'
]))
assert
(
3620864
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.9'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_95.0'
]))
assert
(
1908736
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_95.0'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.0'
]))
assert
(
2072576
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.0'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.9'
]))
assert
(
2605056
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.9'
][
0
])
# Negative case - invalid raw output.
assert
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
)
is
False
)
...
...
tests/benchmarks/micro_benchmarks/test_gemm_flops_performance_base.py
View file @
9f56b219
...
...
@@ -72,7 +72,7 @@ class FakeGemmFlopsBenchmark(GemmFlopsBenchmark):
return
True
def
test_
m
em
ory_bw
_performance_base
():
def
test_
g
em
m_flops
_performance_base
():
"""Test GemmFlopsBenchmark."""
# Positive case - memory=pinned.
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
)
...
...
@@ -81,49 +81,49 @@ def test_memory_bw_performance_base():
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
# Check command list
expected_command
=
[
'echo "--precision
FP
64 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
32 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
64_
TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
TF
32_
TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
BF
16_
TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
16_
TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
INT8_TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
INT4_TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
'echo "--precision
fp
64 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
32 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
64_
tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
tf
32_
tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
bf
16_
tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
16_
tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
int8_tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
int4_tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
]
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
for
i
,
metric
in
enumerate
(
[
'
FP
64'
,
'
FP
32'
,
'
FP
16'
,
'
FP
64_
TC
'
,
'
TF
32_
TC
'
,
'
BF
16_
TC
'
,
'
FP
16_
TC
'
,
'
INT8_TC'
,
'INT4_TC
'
]
[
'
fp
64'
,
'
fp
32'
,
'
fp
16'
,
'
fp
64_
tc
'
,
'
tf
32_
tc
'
,
'
bf
16_
tc
'
,
'
fp
16_
tc
'
,
'
int8_tc'
,
'int4_tc
'
]
):
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
# Positive case - memory=unpinned.
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
FP
64
FP
32
FP
16'
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
fp
64
fp
32
fp
16'
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
run
())
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
# Check command list
expected_command
=
[
'echo "--precision
FP
64 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
32 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
'echo "--precision
fp
64 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
32 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
]
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
for
i
,
metric
in
enumerate
([
'
FP
64'
,
'
FP
32'
,
'
FP
16'
]):
for
i
,
metric
in
enumerate
([
'
fp
64'
,
'
fp
32'
,
'
fp
16'
]):
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
FP
64
BF
64'
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
fp
64
bf
64'
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
run
()
is
True
)
# Negative case - INVALID_ARGUMENT.
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
BF
64'
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
bf
64'
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
run
()
is
False
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
NO_SUPPORTED_PRECISION
)
tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py
View file @
9f56b219
...
...
@@ -98,14 +98,19 @@ the job to be terminated. The first process to do so was:
# Check function process_raw_data.
# Positive case - valid raw output.
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
test_name
=
'IsolatedNetworkTests'
metric_list
=
[
'RRTwo-sidedLat(8B)'
,
'RRGetLat(8B)'
,
'RRTwo-sidedBW(131072B)'
,
'RRPutBW(131072B)'
,
'RRTwo-sidedBW+Sync(131072B)'
,
'NatTwo-sidedBW(131072B)'
,
'MultipleAllreduce(8B)'
,
'MultipleAlltoall(4096B)'
'rr_two-sided_lat'
,
'rr_get_lat'
,
'rr_two-sided_bw'
,
'rr_put_bw'
,
'rr_two-sided+sync_bw'
,
'nat_two-sided_bw'
,
'multiple_allreduce_time'
,
'multiple_alltoall_bw'
,
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'
A
vg'
,
'99%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
for
suffix
in
[
'
a
vg'
,
'99%'
]:
metric
=
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
...
...
@@ -253,58 +258,10 @@ the job to be terminated. The first process to do so was:
assert
(
len
(
benchmark
.
result
)
==
benchmark
.
default_metric_count
)
# Positive case - valid raw output.
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
test_name
=
'IsolatedNetworkTests'
metric_list
=
[
'RRTwo-sidedLat(8B)'
,
'RRTwo-sidedBW+Sync(131072B)'
,
'MultipleAllreduce(8B)'
]
metric_list
=
[
'rr_two-sided_lat_x'
,
'rr_two-sided+sync_bw_x'
,
'multiple_allreduce_x'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'IsolatedCongestionTests'
metric_list
=
[
'GetBcast(4096B)'
,
'PutIncast(4096B)'
,
'Two-sidedIncast(4096B)'
,
'Alltoall(4096B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'NetworkTestsrunningwithCongestionTests(RRTwo-sidedLatNetworkTest)'
metric_list
=
[
'GetBcast(4096B)'
,
'PutIncast(4096B)'
,
'Two-sidedIncast(4096B)'
,
'Alltoall(4096B)'
,
'RRTwo-sidedLat(8B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'NetworkTestsrunningwithCongestionTests(RRTwo-sidedBW+SyncNetworkTest)'
metric_list
=
[
'GetBcast(4096B)'
,
'PutIncast(4096B)'
,
'Two-sidedIncast(4096B)'
,
'Alltoall(4096B)'
,
'RRTwo-sidedBW+Sync(131072B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'NetworkTestsrunningwithCongestionTests(MultipleAllreduceNetworkTest)'
metric_list
=
[
'GetBcast(4096B)'
,
'PutIncast(4096B)'
,
'Two-sidedIncast(4096B)'
,
'Alltoall(4096B)'
,
'MultipleAllreduce(8B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'NetworkTestsrunningwithCongestionTests-KeyResults'
metric_list
=
[
'RRTwo-sidedLat(8B)'
,
'RRTwo-sidedBW+Sync(131072B)'
,
'MultipleAllreduce(8B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Avg'
,
'99%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
for
suffix
in
[
'avg'
,
'99%'
]:
metric
=
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
...
...
tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py
View file @
9f56b219
...
...
@@ -119,8 +119,8 @@ gpu0_to_gpu0_by_gpu0_using_dma_under_numa1 634.203
else
:
assert
(
len
(
benchmark
.
result
[
output_key
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
output_key
][
0
],
numbers
.
Number
))
assert
(
output_key
in
test_raw_output_dict
)
assert
(
test_raw_output_dict
[
output_key
]
==
benchmark
.
result
[
output_key
][
0
])
assert
(
output_key
.
strip
(
'_bw'
)
in
test_raw_output_dict
)
assert
(
test_raw_output_dict
[
output_key
.
strip
(
'_bw'
)
]
==
benchmark
.
result
[
output_key
][
0
])
# Negative case - invalid raw output.
assert
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
)
is
False
)
...
...
tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py
View file @
9f56b219
...
...
@@ -158,7 +158,7 @@ remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97f
metric_list
=
[]
for
ib_command
in
benchmark
.
_args
.
commands
:
for
size
in
[
'8388608'
,
'4194304'
,
'1024'
,
'2'
]:
metric
=
'
IB
_{}_{}_
Avg_{}
'
.
format
(
ib_command
,
size
,
str
(
benchmark
.
_args
.
ib_index
))
metric
=
'
ib
_{}_{}_
ib{}_bw
'
.
format
(
ib_command
,
size
,
str
(
benchmark
.
_args
.
ib_index
))
metric_list
.
append
(
metric
)
for
metric
in
metric_list
:
assert
(
metric
in
benchmark
.
result
)
...
...
@@ -270,7 +270,7 @@ remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97f
# Positive case - valid raw output.
metric_list
=
[]
for
ib_command
in
benchmark
.
_args
.
commands
:
metric
=
'
IB
_{}_8388608_
Avg_{}
'
.
format
(
ib_command
,
str
(
benchmark
.
_args
.
ib_index
))
metric
=
'
ib
_{}_8388608_
ib{}_bw
'
.
format
(
ib_command
,
str
(
benchmark
.
_args
.
ib_index
))
metric_list
.
append
(
metric
)
for
metric
in
metric_list
:
assert
(
metric
in
benchmark
.
result
)
...
...
tests/benchmarks/micro_benchmarks/test_ib_traffic_performance.py
View file @
9f56b219
...
...
@@ -27,6 +27,9 @@ class IBBenchmarkTest(unittest.TestCase):
def
tearDown
(
self
):
"""Method called after the test method has been called and the result recorded."""
self
.
__binary_file
.
unlink
()
p
=
Path
(
'hostfile'
)
if
p
.
is_file
():
p
.
unlink
()
def
test_generate_config
(
self
):
# noqa: C901
"""Test util functions ."""
...
...
@@ -126,15 +129,18 @@ class IBBenchmarkTest(unittest.TestCase):
# Check preprocess
# Negative cases
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
mock_ib_devices
.
return_value
=
None
ret
=
benchmark
.
_preprocess
()
assert
(
ret
is
False
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
MICROBENCHMARK_MPI_INIT_FAILURE
)
hosts
=
[
'node0
\n
'
,
'node1
\n
'
,
'node2
\n
'
,
'node3
\n
'
]
with
open
(
'hostfile'
,
'w'
)
as
f
:
f
.
writelines
(
hosts
)
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'4'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
mock_ib_devices
.
return_value
=
None
ret
=
benchmark
.
_preprocess
()
...
...
@@ -143,21 +149,21 @@ class IBBenchmarkTest(unittest.TestCase):
# Positive cases
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'3'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
ret
=
benchmark
.
_preprocess
()
assert
(
ret
is
True
)
# Generate config
parameters
=
'--ib_index 0 --iters 2000 --msg_size 33554432'
parameters
=
'--ib_index 0 --iters 2000 --msg_size 33554432
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'4'
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
ret
=
benchmark
.
_preprocess
()
Path
(
'config.txt'
).
unlink
()
assert
(
ret
)
expect_command
=
'ib_validation --hostfile
/root/
hostfile --cmd_prefix "ib_write_bw -F '
+
\
expect_command
=
'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F '
+
\
'--iters=2000 -d mlx5_0 -s 33554432" --input_config '
+
os
.
getcwd
()
+
'/config.txt'
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
0
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expect_command
)
...
...
@@ -167,14 +173,14 @@ class IBBenchmarkTest(unittest.TestCase):
with
open
(
'test_config.txt'
,
'w'
)
as
f
:
for
line
in
config
:
f
.
write
(
line
+
'
\n
'
)
parameters
=
'--ib_index 0 --iters 2000 --msg_size 33554432 --config test_config.txt'
parameters
=
'--ib_index 0 --iters 2000 --msg_size 33554432 --config test_config.txt
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'2'
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
ret
=
benchmark
.
_preprocess
()
Path
(
'test_config.txt'
).
unlink
()
assert
(
ret
)
expect_command
=
'ib_validation --hostfile
/root/
hostfile --cmd_prefix "ib_write_bw -F '
+
\
expect_command
=
'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F '
+
\
'--iters=2000 -d mlx5_0 -s 33554432" --input_config test_config.txt'
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
0
].
split
(
benchmark
.
_bin_name
)[
1
]
...
...
tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py
View file @
9f56b219
...
...
@@ -36,7 +36,7 @@ def test_kernel_launch_overhead():
assert
(
'raw_output_0'
in
benchmark
.
raw_data
)
assert
(
len
(
benchmark
.
raw_data
[
'raw_output_0'
])
==
1
)
assert
(
isinstance
(
benchmark
.
raw_data
[
'raw_output_0'
][
0
],
str
))
for
metric
in
[
'event_
overhead
'
,
'wall_
overhead
'
]:
for
metric
in
[
'event_
time
'
,
'wall_
time
'
]:
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
tests/benchmarks/micro_benchmarks/test_matmul.py
View file @
9f56b219
...
...
@@ -35,6 +35,6 @@ def test_pytorch_matmul():
# Check results and metrics.
assert
(
benchmark
.
run_count
==
2
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
assert
(
len
(
benchmark
.
raw_data
[
'nosharding'
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
raw_data
[
'nosharding'
][
0
])
==
benchmark
.
_args
.
num_steps
)
assert
(
len
(
benchmark
.
result
[
'nosharding'
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
raw_data
[
'nosharding
_time
'
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
raw_data
[
'nosharding
_time
'
][
0
])
==
benchmark
.
_args
.
num_steps
)
assert
(
len
(
benchmark
.
result
[
'nosharding
_time
'
])
==
benchmark
.
run_count
)
tests/benchmarks/micro_benchmarks/test_memory_bw_performance_base.py
View file @
9f56b219
...
...
@@ -83,7 +83,7 @@ def test_memory_bw_performance_base():
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
for
i
,
metric
in
enumerate
([
'
H2D_Mem_BW'
,
'D2H_Mem_BW'
,
'D2D_Mem_BW
'
]):
for
i
,
metric
in
enumerate
([
'
h2d_bw'
,
'd2h_bw'
,
'd2d_bw
'
]):
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
...
...
@@ -97,7 +97,7 @@ def test_memory_bw_performance_base():
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
for
i
,
metric
in
enumerate
([
'
H2D_Mem_BW'
,
'D2H_Mem_BW'
,
'D2D_Mem_BW
'
]):
for
i
,
metric
in
enumerate
([
'
h2d_bw'
,
'd2h_bw'
,
'd2d_bw
'
]):
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
...
...
tests/benchmarks/micro_benchmarks/test_rocm_gemm_flops_performance.py
View file @
9f56b219
...
...
@@ -92,11 +92,11 @@ T,N,7680,8192,8192,1,8416,0,8416,8416,8416,1, 162675, 6336.5
assert
(
benchmark
.
_process_raw_result
(
3
,
raw_output_BF16_X
))
assert
(
benchmark
.
_process_raw_result
(
4
,
raw_output_INT8_X
))
assert
(
benchmark
.
result
[
'
FP64
'
][
0
]
==
10037.5
)
assert
(
benchmark
.
result
[
'
FP
32_x
DLOPS
'
][
0
]
==
39441.6
)
assert
(
benchmark
.
result
[
'
FP
16_x
DLOPS
'
][
0
]
==
153728
)
assert
(
benchmark
.
result
[
'
BF
16_x
DLOPS
'
][
0
]
==
81374.3
)
assert
(
benchmark
.
result
[
'
INT8_xDLOPS
'
][
0
]
==
162675
)
assert
(
benchmark
.
result
[
'
fp64_flops
'
][
0
]
==
10037.5
)
assert
(
benchmark
.
result
[
'
fp
32_x
dlops_flops
'
][
0
]
==
39441.6
)
assert
(
benchmark
.
result
[
'
fp
16_x
dlops_flops
'
][
0
]
==
153728
)
assert
(
benchmark
.
result
[
'
bf
16_x
dlops_flops
'
][
0
]
==
81374.3
)
assert
(
benchmark
.
result
[
'
int8_xdlops_iops
'
][
0
]
==
162675
)
# Negative case - Add invalid raw output.
assert
(
benchmark
.
_process_raw_result
(
4
,
'Invalid raw output'
)
is
False
)
tests/benchmarks/micro_benchmarks/test_rocm_memory_bw_performance.py
View file @
9f56b219
...
...
@@ -159,11 +159,11 @@ Note: results marked with (*) had missing values such as
might occur with a mixture of architectural capabilities.
"""
for
i
,
metric
in
enumerate
([
'h
tod_524288kB'
,
'htod_524288kB
'
]):
for
i
,
metric
in
enumerate
([
'h
2d_bw'
,
'd2h_bw
'
]):
assert
(
benchmark
.
_process_raw_result
(
i
,
raw_output
[
i
]))
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
assert
(
benchmark
.
result
[
'h
tod_524288kB
'
][
0
]
==
2
4.6708
)
assert
(
benchmark
.
result
[
'd
toh_524288kB
'
][
0
]
==
27.9348
)
assert
(
benchmark
.
result
[
'h
2d_bw
'
][
0
]
==
2
5.2351
)
assert
(
benchmark
.
result
[
'd
2h_bw
'
][
0
]
==
27.9348
)
tests/benchmarks/micro_benchmarks/test_sharding_matmul.py
View file @
9f56b219
...
...
@@ -44,7 +44,7 @@ def test_pytorch_sharding_matmul():
# Check results and metrics.
assert
(
benchmark
.
run_count
==
2
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
for
metric
in
[
'allreduce'
,
'allgather'
]:
for
metric
in
[
'allreduce
_time
'
,
'allgather
_time
'
]:
assert
(
len
(
benchmark
.
raw_data
[
metric
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
raw_data
[
metric
][
0
])
==
benchmark
.
_args
.
num_steps
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
benchmark
.
run_count
)
...
...
tests/benchmarks/micro_benchmarks/test_tcp_connectivity.py
View file @
9f56b219
...
...
@@ -52,15 +52,15 @@ class TCPConnectivityBenchmarkTest(unittest.TestCase):
assert
(
benchmark
.
result
)
# Check results and metrics.
assert
(
benchmark
.
result
[
'
Successed_
api.github.com'
][
0
]
==
10
)
assert
(
benchmark
.
result
[
'
Failed_
api.github.com'
][
0
]
==
0
)
assert
(
benchmark
.
result
[
'
Success_Rate_
api.github.com'
][
0
]
==
100.0
)
assert
(
isinstance
(
benchmark
.
result
[
'
Minimum_
api.github.com'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Maximum_
api.github.com'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Average_
api.github.com'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
S
uccessed_
localhos
t'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Failed_
localhost'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Maximum_
localhost'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Minimum_
localhost'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Average_
localhost'
][
0
],
numbers
.
Number
))
assert
(
benchmark
.
result
[
'api.github.com
_successed_count
'
][
0
]
==
10
)
assert
(
benchmark
.
result
[
'api.github.com
_failed_count
'
][
0
]
==
0
)
assert
(
benchmark
.
result
[
'api.github.com
_success_rate
'
][
0
]
==
100.0
)
assert
(
isinstance
(
benchmark
.
result
[
'api.github.com
_time_min
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'api.github.com
_time_max
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'api.github.com
_time_avg
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
localhost_s
uccessed_
coun
t'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'localhost
_failed_count
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'localhost
_time_max
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'localhost
_time_min
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'localhost
_time_avg
'
][
0
],
numbers
.
Number
))
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
tests/benchmarks/micro_benchmarks/test_tensorrt_inference_performance.py
View file @
9f56b219
...
...
@@ -135,9 +135,9 @@ class TensorRTInferenceBenchmarkTestCase(unittest.TestCase):
self
.
assertEqual
(
6
+
benchmark
.
default_metric_count
,
len
(
benchmark
.
result
))
for
tag
in
[
'mean'
,
'99'
]:
self
.
assertEqual
(
0.5
,
benchmark
.
result
[
f
'
gpu_lat_ms
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
0.6
,
benchmark
.
result
[
f
'
host_lat_ms
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
1.0
,
benchmark
.
result
[
f
'end_to_end_
lat_ms
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
0.5
,
benchmark
.
result
[
f
'
model_0_gpu_time
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
0.6
,
benchmark
.
result
[
f
'
model_0_host_time
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
1.0
,
benchmark
.
result
[
f
'
model_0_
end_to_end_
time
_
{
tag
}
'
][
0
])
# Negative case - invalid raw output
self
.
assertFalse
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
))
tests/benchmarks/model_benchmarks/test_model_base.py
View file @
9f56b219
...
...
@@ -223,10 +223,10 @@ def test_train():
expected_result
=
(
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {'
'"
steptime_train_float32
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
throughput_train_float32
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "
steptime_train_float32": [2.0], "throughput_train_float32
": [16000.0]}, '
'"reduce_op": {"
steptime_train_float32": "max", "throughput_train_float32
": "min"}}'
'"
fp32_train_step_time
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
fp32_train_throughput
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "
fp32_train_step_time": [2.0], "fp32_train_throughput
": [16000.0]}, '
'"reduce_op": {"
fp32_train_step_time": "max", "fp32_train_throughput
": "min"}}'
)
assert
(
benchmark
.
_preprocess
())
assert
(
benchmark
.
_ModelBenchmark__train
(
Precision
.
FLOAT32
))
...
...
@@ -249,10 +249,11 @@ def test_inference():
expected_result
=
(
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {'
'"steptime_inference_float16": [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]], '
'"throughput_inference_float16": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"result": {"return_code": [0], "steptime_inference_float16": [4.0], "throughput_inference_float16": '
'[8000.0]}, "reduce_op": {"steptime_inference_float16": null, "throughput_inference_float16": null}}'
'"fp16_inference_step_time": [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]], '
'"fp16_inference_throughput": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"result": {"return_code": [0], '
'"fp16_inference_step_time": [4.0], "fp16_inference_throughput": [8000.0]}, '
'"reduce_op": {"fp16_inference_step_time": null, "fp16_inference_throughput": null}}'
)
assert
(
benchmark
.
_preprocess
())
assert
(
benchmark
.
_ModelBenchmark__inference
(
Precision
.
FLOAT16
))
...
...
@@ -280,31 +281,31 @@ def test_benchmark():
assert
(
benchmark
.
run_count
==
1
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
expected_raw_data
=
{
'
steptime_train_float32
'
:
[[
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
]],
'
throughput_train_float32
'
:
[[
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
]],
'
steptime_train_float16
'
:
[[
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
]],
'
throughput_train_float16
'
:
[[
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
]]
'
fp32_train_step_time
'
:
[[
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
]],
'
fp32_train_throughput
'
:
[[
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
]],
'
fp16_train_step_time
'
:
[[
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
]],
'
fp16_train_throughput
'
:
[[
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
]]
}
assert
(
benchmark
.
raw_data
==
expected_raw_data
)
expected_result
=
{
'return_code'
:
[
0
],
'
steptime_train_float32
'
:
[
2.0
],
'
throughput_train_float32
'
:
[
16000.0
],
'
steptime_train_float16
'
:
[
2.0
],
'
throughput_train_float16
'
:
[
16000.0
]
'
fp32_train_step_time
'
:
[
2.0
],
'
fp32_train_throughput
'
:
[
16000.0
],
'
fp16_train_step_time
'
:
[
2.0
],
'
fp16_train_throughput
'
:
[
16000.0
]
}
assert
(
benchmark
.
result
==
expected_result
)
expected_serialized_result
=
(
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, "start_time": null, '
'"end_time": null, "raw_data": {"
steptime_train_float32
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
throughput_train_float32
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
'"
steptime_train_float16
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
throughput_train_float16
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "
steptime_train_float32": [2.0], "throughput_train_float32
": [16000.0], '
'"
steptime_train_float16": [2.0], "throughput_train_float16
": [16000.0]}, '
'"reduce_op": {"
steptime_train_float32": "max", "throughput_train_float32
": "min", '
'"
steptime_train_float16": "max", "throughput_train_float16
": "min"}}'
'"end_time": null, "raw_data": {"
fp32_train_step_time
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
fp32_train_throughput
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
'"
fp16_train_step_time
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
fp16_train_throughput
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "
fp32_train_step_time": [2.0], "fp32_train_throughput
": [16000.0], '
'"
fp16_train_step_time": [2.0], "fp16_train_throughput
": [16000.0]}, '
'"reduce_op": {"
fp32_train_step_time": "max", "fp32_train_throughput
": "min", '
'"
fp16_train_step_time": "max", "fp16_train_throughput
": "min"}}'
)
assert
(
benchmark
.
serialized_result
==
expected_serialized_result
)
...
...
tests/benchmarks/model_benchmarks/test_pytorch_base.py
View file @
9f56b219
...
...
@@ -188,8 +188,7 @@ def test_pytorch_base():
# Test results.
for
metric
in
[
'steptime_train_float32'
,
'steptime_inference_float32'
,
'throughput_train_float32'
,
'throughput_inference_float32'
'fp32_train_step_time'
,
'fp32_inference_step_time'
,
'fp32_train_throughput'
,
'fp32_inference_throughput'
]:
assert
(
len
(
benchmark
.
raw_data
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
raw_data
[
metric
][
0
])
==
64
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment