Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
9f56b219
Unverified
Commit
9f56b219
authored
Dec 09, 2021
by
Yuting Jiang
Committed by
GitHub
Dec 09, 2021
Browse files
Benchmarks: Unify metric names of benchmarks (#252)
**Description** Unify metric names of benchmarks.
parent
c13ed2a2
Changes
44
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
144 additions
and
178 deletions
+144
-178
superbench/benchmarks/model_benchmarks/model_base.py
superbench/benchmarks/model_benchmarks/model_base.py
+5
-2
tests/benchmarks/docker_benchmarks/test_rocm_onnxruntime_performance.py
...ks/docker_benchmarks/test_rocm_onnxruntime_performance.py
+10
-10
tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py
...arks/micro_benchmarks/test_cuda_gemm_flops_performance.py
+12
-12
tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py
...marks/micro_benchmarks/test_cuda_memory_bw_performance.py
+1
-1
tests/benchmarks/micro_benchmarks/test_disk_performance.py
tests/benchmarks/micro_benchmarks/test_disk_performance.py
+13
-13
tests/benchmarks/micro_benchmarks/test_gemm_flops_performance_base.py
...arks/micro_benchmarks/test_gemm_flops_performance_base.py
+18
-18
tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py
tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py
+13
-56
tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py
...nchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py
+2
-2
tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py
...nchmarks/micro_benchmarks/test_ib_loopback_performance.py
+2
-2
tests/benchmarks/micro_benchmarks/test_ib_traffic_performance.py
...enchmarks/micro_benchmarks/test_ib_traffic_performance.py
+13
-7
tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py
...enchmarks/micro_benchmarks/test_kernel_launch_overhead.py
+1
-1
tests/benchmarks/micro_benchmarks/test_matmul.py
tests/benchmarks/micro_benchmarks/test_matmul.py
+3
-3
tests/benchmarks/micro_benchmarks/test_memory_bw_performance_base.py
...marks/micro_benchmarks/test_memory_bw_performance_base.py
+2
-2
tests/benchmarks/micro_benchmarks/test_rocm_gemm_flops_performance.py
...arks/micro_benchmarks/test_rocm_gemm_flops_performance.py
+5
-5
tests/benchmarks/micro_benchmarks/test_rocm_memory_bw_performance.py
...marks/micro_benchmarks/test_rocm_memory_bw_performance.py
+3
-3
tests/benchmarks/micro_benchmarks/test_sharding_matmul.py
tests/benchmarks/micro_benchmarks/test_sharding_matmul.py
+1
-1
tests/benchmarks/micro_benchmarks/test_tcp_connectivity.py
tests/benchmarks/micro_benchmarks/test_tcp_connectivity.py
+11
-11
tests/benchmarks/micro_benchmarks/test_tensorrt_inference_performance.py
...s/micro_benchmarks/test_tensorrt_inference_performance.py
+3
-3
tests/benchmarks/model_benchmarks/test_model_base.py
tests/benchmarks/model_benchmarks/test_model_base.py
+25
-24
tests/benchmarks/model_benchmarks/test_pytorch_base.py
tests/benchmarks/model_benchmarks/test_pytorch_base.py
+1
-2
No files found.
superbench/benchmarks/model_benchmarks/model_base.py
View file @
9f56b219
...
@@ -373,7 +373,10 @@ class ModelBenchmark(Benchmark):
...
@@ -373,7 +373,10 @@ class ModelBenchmark(Benchmark):
)
)
return
False
return
False
metric
=
'steptime_{}_{}'
.
format
(
model_action
,
precision
)
precision_metric
=
{
'float16'
:
'fp16'
,
'float32'
:
'fp32'
,
'float64'
:
'fp64'
,
'bfloat16'
:
'bf16'
}
if
precision
.
value
in
precision_metric
.
keys
():
precision
=
precision_metric
[
precision
.
value
]
metric
=
'{}_{}_step_time'
.
format
(
precision
,
model_action
)
self
.
_result
.
add_raw_data
(
metric
,
step_times
)
self
.
_result
.
add_raw_data
(
metric
,
step_times
)
avg
=
statistics
.
mean
(
step_times
)
avg
=
statistics
.
mean
(
step_times
)
self
.
_result
.
add_result
(
metric
,
avg
,
reduce_type
=
ReduceType
.
MAX
if
model_action
is
ModelAction
.
TRAIN
else
None
)
self
.
_result
.
add_result
(
metric
,
avg
,
reduce_type
=
ReduceType
.
MAX
if
model_action
is
ModelAction
.
TRAIN
else
None
)
...
@@ -381,7 +384,7 @@ class ModelBenchmark(Benchmark):
...
@@ -381,7 +384,7 @@ class ModelBenchmark(Benchmark):
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
# The unit of step time is millisecond, use it to calculate the throughput with the unit samples/sec.
millisecond_per_second
=
1000
millisecond_per_second
=
1000
throughput
=
[
millisecond_per_second
/
step_time
*
self
.
_args
.
batch_size
for
step_time
in
step_times
]
throughput
=
[
millisecond_per_second
/
step_time
*
self
.
_args
.
batch_size
for
step_time
in
step_times
]
metric
=
'throughput
_{}_{}
'
.
format
(
model_action
,
precis
ion
)
metric
=
'
{}_{}_
throughput'
.
format
(
precision
,
model_act
ion
)
self
.
_result
.
add_raw_data
(
metric
,
throughput
)
self
.
_result
.
add_raw_data
(
metric
,
throughput
)
avg
=
statistics
.
mean
(
throughput
)
avg
=
statistics
.
mean
(
throughput
)
self
.
_result
.
add_result
(
metric
,
avg
,
reduce_type
=
ReduceType
.
MIN
if
model_action
is
ModelAction
.
TRAIN
else
None
)
self
.
_result
.
add_result
(
metric
,
avg
,
reduce_type
=
ReduceType
.
MIN
if
model_action
is
ModelAction
.
TRAIN
else
None
)
...
...
tests/benchmarks/docker_benchmarks/test_rocm_onnxruntime_performance.py
View file @
9f56b219
...
@@ -44,13 +44,13 @@ __superbench__ begin roberta-large ngpu=8
...
@@ -44,13 +44,13 @@ __superbench__ begin roberta-large ngpu=8
"samples_per_second": 274.455
"samples_per_second": 274.455
"""
"""
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
assert
(
benchmark
.
result
[
'bert_large_uncased_ngpu_1'
][
0
]
==
21.829
)
assert
(
benchmark
.
result
[
'bert_large_uncased_ngpu_1
_throughput
'
][
0
]
==
21.829
)
assert
(
benchmark
.
result
[
'bert_large_uncased_ngpu_8'
][
0
]
==
147.181
)
assert
(
benchmark
.
result
[
'bert_large_uncased_ngpu_8
_throughput
'
][
0
]
==
147.181
)
assert
(
benchmark
.
result
[
'distilbert_base_uncased_ngpu_1'
][
0
]
==
126.827
)
assert
(
benchmark
.
result
[
'distilbert_base_uncased_ngpu_1
_throughput
'
][
0
]
==
126.827
)
assert
(
benchmark
.
result
[
'distilbert_base_uncased_ngpu_8'
][
0
]
==
966.796
)
assert
(
benchmark
.
result
[
'distilbert_base_uncased_ngpu_8
_throughput
'
][
0
]
==
966.796
)
assert
(
benchmark
.
result
[
'gpt2_ngpu_1'
][
0
]
==
20.46
)
assert
(
benchmark
.
result
[
'gpt2_ngpu_1
_throughput
'
][
0
]
==
20.46
)
assert
(
benchmark
.
result
[
'gpt2_ngpu_8'
][
0
]
==
151.089
)
assert
(
benchmark
.
result
[
'gpt2_ngpu_8
_throughput
'
][
0
]
==
151.089
)
assert
(
benchmark
.
result
[
'facebook_bart_large_ngpu_1'
][
0
]
==
66.171
)
assert
(
benchmark
.
result
[
'facebook_bart_large_ngpu_1
_throughput
'
][
0
]
==
66.171
)
assert
(
benchmark
.
result
[
'facebook_bart_large_ngpu_8'
][
0
]
==
370.343
)
assert
(
benchmark
.
result
[
'facebook_bart_large_ngpu_8
_throughput
'
][
0
]
==
370.343
)
assert
(
benchmark
.
result
[
'roberta_large_ngpu_1'
][
0
]
==
37.103
)
assert
(
benchmark
.
result
[
'roberta_large_ngpu_1
_throughput
'
][
0
]
==
37.103
)
assert
(
benchmark
.
result
[
'roberta_large_ngpu_8'
][
0
]
==
274.455
)
assert
(
benchmark
.
result
[
'roberta_large_ngpu_8
_throughput
'
][
0
]
==
274.455
)
tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py
View file @
9f56b219
...
@@ -38,7 +38,7 @@ class CudaGemmFlopsBenchmarkTest(unittest.TestCase):
...
@@ -38,7 +38,7 @@ class CudaGemmFlopsBenchmarkTest(unittest.TestCase):
# Negative case - MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE.
# Negative case - MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE.
benchmark
=
benchmark_class
(
benchmark
=
benchmark_class
(
benchmark_name
,
benchmark_name
,
parameters
=
'--num_warmup 200 --n 1024 --k 512 --m 2048 --precision
FP
32
TF
32_
TC FP16_TC INT8_TC
'
parameters
=
'--num_warmup 200 --n 1024 --k 512 --m 2048 --precision
fp
32
tf
32_
tc fp16_tc int8_tc
'
)
)
ret
=
benchmark
.
_preprocess
()
ret
=
benchmark
.
_preprocess
()
...
@@ -59,11 +59,11 @@ class CudaGemmFlopsBenchmarkTest(unittest.TestCase):
...
@@ -59,11 +59,11 @@ class CudaGemmFlopsBenchmarkTest(unittest.TestCase):
assert
(
benchmark
.
_args
.
n
==
1024
)
assert
(
benchmark
.
_args
.
n
==
1024
)
assert
(
benchmark
.
_args
.
k
==
512
)
assert
(
benchmark
.
_args
.
k
==
512
)
assert
(
benchmark
.
_args
.
m
==
2048
)
assert
(
benchmark
.
_args
.
m
==
2048
)
assert
(
benchmark
.
_args
.
precision
==
[
'
FP
32'
,
'
TF
32_
TC
'
,
'
FP
16_
TC
'
,
'
INT8_TC
'
])
assert
(
benchmark
.
_args
.
precision
==
[
'
fp
32'
,
'
tf
32_
tc
'
,
'
fp
16_
tc
'
,
'
int8_tc
'
])
benchmark
.
_CudaGemmFlopsBenchmark__precision_need_to_run
=
[
'
FP
32'
,
'
TF
32_
TC
'
,
'
FP
16_
TC
'
,
'
INT8_TC
'
]
benchmark
.
_CudaGemmFlopsBenchmark__precision_need_to_run
=
[
'
fp
32'
,
'
tf
32_
tc
'
,
'
fp
16_
tc
'
,
'
int8_tc
'
]
# Check results and metrics.
# Check results and metrics.
raw_output_
FP
32
=
"""
raw_output_
fp
32
=
"""
CSV Results:
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
...
@@ -72,7 +72,7 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
...
@@ -72,7 +72,7 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
1,CUTLASS,gemm,cutlass_simt_sgemm_128x128_8x2_tn_align1,passed,success,universal,16384,16384,16384,f32:row,f32:column,f32:column,1,0,1,1,simt,f32,128,128,8,2,4,2,1,1,1,1,50,1024,3221225472,8796629893120,482.034,6.22363,18249
1,CUTLASS,gemm,cutlass_simt_sgemm_128x128_8x2_tn_align1,passed,success,universal,16384,16384,16384,f32:row,f32:column,f32:column,1,0,1,1,simt,f32,128,128,8,2,4,2,1,1,1,1,50,1024,3221225472,8796629893120,482.034,6.22363,18249
1,CUTLASS,gemm,cutlass_simt_sgemm_128x128_8x2_tt_align1,passed,success,universal,16384,16384,16384,f32:row,f32:row,f32:column,1,0,1,1,simt,f32,128,128,8,2,4,2,1,1,1,1,50,1024,3221225472,8796629893120,481.838,6.22616,18256.4
1,CUTLASS,gemm,cutlass_simt_sgemm_128x128_8x2_tt_align1,passed,success,universal,16384,16384,16384,f32:row,f32:row,f32:column,1,0,1,1,simt,f32,128,128,8,2,4,2,1,1,1,1,50,1024,3221225472,8796629893120,481.838,6.22616,18256.4
"""
"""
raw_output_
TF
32_
TC
=
"""
raw_output_
tf
32_
tc
=
"""
CSV Results:
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
...
@@ -81,7 +81,7 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
...
@@ -81,7 +81,7 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
1,CUTLASS,gemm,cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_tn_align4,passed,success,universal,16384,16384,16384,tf32:row,tf32:column,tf32:column,1,0,1,1,tensorop,f32,256,128,16,3,4,2,1,16,8,8,80,1024,3221225472,8796629893120,86.5167,34.6754,101676
1,CUTLASS,gemm,cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_tn_align4,passed,success,universal,16384,16384,16384,tf32:row,tf32:column,tf32:column,1,0,1,1,tensorop,f32,256,128,16,3,4,2,1,16,8,8,80,1024,3221225472,8796629893120,86.5167,34.6754,101676
1,CUTLASS,gemm,cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_tt_align4,passed,success,universal,16384,16384,16384,tf32:row,tf32:row,tf32:column,1,0,1,1,tensorop,f32,256,128,16,3,4,2,1,16,8,8,80,1024,3221225472,8796629893120,68.3621,43.884,128677
1,CUTLASS,gemm,cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_tt_align4,passed,success,universal,16384,16384,16384,tf32:row,tf32:row,tf32:column,1,0,1,1,tensorop,f32,256,128,16,3,4,2,1,16,8,8,80,1024,3221225472,8796629893120,68.3621,43.884,128677
"""
"""
raw_output_
FP
16_
TC
=
"""
raw_output_
fp
16_
tc
=
"""
CSV Results:
CSV Results:
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,C,alpha,beta,split_k_slices,batch_count,op_class,accum,cta_m,cta_n,cta_k,stages,warps_m,warps_n,warps_k,inst_m,inst_n,inst_k,min_cc,max_cc,Bytes,Flops,Runtime,GB/s,GFLOPs
...
@@ -90,13 +90,13 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
...
@@ -90,13 +90,13 @@ Problem,Provider,OperationKind,Operation,Disposition,Status,gemm_kind,m,n,k,A,B,
1,CUTLASS,gemm,cutlass_tensorop_h16816gemm_256x128_32x3_tn_align8,incorrect,success,universal,16384,16384,16384,f16:row,f16:column,f16:column,1,0,1,1,tensorop,f16,256,128,32,3,4,2,1,16,8,16,80,1024,1610612736,8796629893120,39.0413,38.4209,225316
1,CUTLASS,gemm,cutlass_tensorop_h16816gemm_256x128_32x3_tn_align8,incorrect,success,universal,16384,16384,16384,f16:row,f16:column,f16:column,1,0,1,1,tensorop,f16,256,128,32,3,4,2,1,16,8,16,80,1024,1610612736,8796629893120,39.0413,38.4209,225316
1,CUTLASS,gemm,cutlass_tensorop_h16816gemm_256x128_32x3_tt_align8,incorrect,success,universal,16384,16384,16384,f16:row,f16:row,f16:column,1,0,1,1,tensorop,f16,256,128,32,3,4,2,1,16,8,16,80,1024,1610612736,8796629893120,31.2994,47.9243,281048
1,CUTLASS,gemm,cutlass_tensorop_h16816gemm_256x128_32x3_tt_align8,incorrect,success,universal,16384,16384,16384,f16:row,f16:row,f16:column,1,0,1,1,tensorop,f16,256,128,32,3,4,2,1,16,8,16,80,1024,1610612736,8796629893120,31.2994,47.9243,281048
"""
"""
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output_
FP
32
))
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output_
fp
32
))
assert
(
benchmark
.
_process_raw_result
(
1
,
raw_output_
TF
32_
TC
))
assert
(
benchmark
.
_process_raw_result
(
1
,
raw_output_
tf
32_
tc
))
assert
(
benchmark
.
_process_raw_result
(
2
,
raw_output_
FP
16_
TC
))
assert
(
benchmark
.
_process_raw_result
(
2
,
raw_output_
fp
16_
tc
))
assert
(
benchmark
.
result
[
'
FP32
'
][
0
]
==
18369.7
)
assert
(
benchmark
.
result
[
'
fp32_flops
'
][
0
]
==
18369.7
)
assert
(
benchmark
.
result
[
'
TF
32_
TC
'
][
0
]
==
128677
)
assert
(
benchmark
.
result
[
'
tf
32_
tc_flops
'
][
0
]
==
128677
)
assert
(
benchmark
.
result
[
'
FP
16_
TC
'
][
0
]
==
281048
)
assert
(
benchmark
.
result
[
'
fp
16_
tc_flops
'
][
0
]
==
281048
)
# Negative case - Add invalid raw output.
# Negative case - Add invalid raw output.
assert
(
benchmark
.
_process_raw_result
(
3
,
'Invalid raw output'
)
is
False
)
assert
(
benchmark
.
_process_raw_result
(
3
,
'Invalid raw output'
)
is
False
)
tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py
View file @
9f56b219
...
@@ -328,7 +328,7 @@ bandwidthTest-D2D, Bandwidth = 772.0 GB/s, Time = 0.00008 s, Size = 64000000 byt
...
@@ -328,7 +328,7 @@ bandwidthTest-D2D, Bandwidth = 772.0 GB/s, Time = 0.00008 s, Size = 64000000 byt
bandwidthTest-D2D, Bandwidth = 762.8 GB/s, Time = 0.00009 s, Size = 68000000 bytes, NumDevsUsed = 1
bandwidthTest-D2D, Bandwidth = 762.8 GB/s, Time = 0.00009 s, Size = 68000000 bytes, NumDevsUsed = 1
Result = PASS
Result = PASS
"""
"""
for
i
,
metric
in
enumerate
([
'
H2D_Mem_BW'
,
'D2H_Mem_BW'
,
'D2D_Mem_BW
'
]):
for
i
,
metric
in
enumerate
([
'
h2d_bw'
,
'd2h_bw'
,
'd2d_bw
'
]):
assert
(
benchmark
.
_process_raw_result
(
i
,
raw_output
[
i
]))
assert
(
benchmark
.
_process_raw_result
(
i
,
raw_output
[
i
]))
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
...
...
tests/benchmarks/micro_benchmarks/test_disk_performance.py
View file @
9f56b219
...
@@ -519,19 +519,19 @@ class DiskBenchmarkTest(unittest.TestCase):
...
@@ -519,19 +519,19 @@ class DiskBenchmarkTest(unittest.TestCase):
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_iops'
]))
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_iops'
]))
assert
(
85066.128925
==
benchmark
.
result
[
jobname_prefix
+
'_write_iops'
][
0
])
assert
(
85066.128925
==
benchmark
.
result
[
jobname_prefix
+
'_write_iops'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_95.0
00000
'
]))
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_95.0'
]))
assert
(
1941504
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_95.0
00000
'
][
0
])
assert
(
1941504
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_95.0'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.0
00000
'
]))
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.0'
]))
assert
(
2244608
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.0
00000
'
][
0
])
assert
(
2244608
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.0'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.9
00000
'
]))
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.9'
]))
assert
(
3620864
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.9
00000
'
][
0
])
assert
(
3620864
==
benchmark
.
result
[
jobname_prefix
+
'_read_lat_ns_99.9'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_95.0
00000
'
]))
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_95.0'
]))
assert
(
1908736
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_95.0
00000
'
][
0
])
assert
(
1908736
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_95.0'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.0
00000
'
]))
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.0'
]))
assert
(
2072576
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.0
00000
'
][
0
])
assert
(
2072576
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.0'
][
0
])
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.9
00000
'
]))
assert
(
1
==
len
(
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.9'
]))
assert
(
2605056
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.9
00000
'
][
0
])
assert
(
2605056
==
benchmark
.
result
[
jobname_prefix
+
'_write_lat_ns_99.9'
][
0
])
# Negative case - invalid raw output.
# Negative case - invalid raw output.
assert
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
)
is
False
)
assert
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
)
is
False
)
...
...
tests/benchmarks/micro_benchmarks/test_gemm_flops_performance_base.py
View file @
9f56b219
...
@@ -72,7 +72,7 @@ class FakeGemmFlopsBenchmark(GemmFlopsBenchmark):
...
@@ -72,7 +72,7 @@ class FakeGemmFlopsBenchmark(GemmFlopsBenchmark):
return
True
return
True
def
test_
m
em
ory_bw
_performance_base
():
def
test_
g
em
m_flops
_performance_base
():
"""Test GemmFlopsBenchmark."""
"""Test GemmFlopsBenchmark."""
# Positive case - memory=pinned.
# Positive case - memory=pinned.
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
)
...
@@ -81,49 +81,49 @@ def test_memory_bw_performance_base():
...
@@ -81,49 +81,49 @@ def test_memory_bw_performance_base():
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
# Check command list
# Check command list
expected_command
=
[
expected_command
=
[
'echo "--precision
FP
64 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
64 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
32 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
32 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
64_
TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
64_
tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
TF
32_
TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
tf
32_
tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
BF
16_
TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
bf
16_
tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
16_
TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
16_
tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
INT8_TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
int8_tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
INT4_TC
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
'echo "--precision
int4_tc
--m 16384 --n 16384 --k 16384 --num_warmup 5"'
]
]
for
i
in
range
(
len
(
expected_command
)):
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
assert
(
command
==
expected_command
[
i
])
for
i
,
metric
in
enumerate
(
for
i
,
metric
in
enumerate
(
[
'
FP
64'
,
'
FP
32'
,
'
FP
16'
,
'
FP
64_
TC
'
,
'
TF
32_
TC
'
,
'
BF
16_
TC
'
,
'
FP
16_
TC
'
,
'
INT8_TC'
,
'INT4_TC
'
]
[
'
fp
64'
,
'
fp
32'
,
'
fp
16'
,
'
fp
64_
tc
'
,
'
tf
32_
tc
'
,
'
bf
16_
tc
'
,
'
fp
16_
tc
'
,
'
int8_tc'
,
'int4_tc
'
]
):
):
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
# Positive case - memory=unpinned.
# Positive case - memory=unpinned.
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
FP
64
FP
32
FP
16'
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
fp
64
fp
32
fp
16'
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
run
())
assert
(
benchmark
.
run
())
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
# Check command list
# Check command list
expected_command
=
[
expected_command
=
[
'echo "--precision
FP
64 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
64 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
32 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
fp
32 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
,
'echo "--precision
FP
16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
'echo "--precision
fp
16 --m 16384 --n 16384 --k 16384 --num_warmup 5"'
]
]
for
i
in
range
(
len
(
expected_command
)):
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
assert
(
command
==
expected_command
[
i
])
for
i
,
metric
in
enumerate
([
'
FP
64'
,
'
FP
32'
,
'
FP
16'
]):
for
i
,
metric
in
enumerate
([
'
fp
64'
,
'
fp
32'
,
'
fp
16'
]):
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
FP
64
BF
64'
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
fp
64
bf
64'
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
run
()
is
True
)
assert
(
benchmark
.
run
()
is
True
)
# Negative case - INVALID_ARGUMENT.
# Negative case - INVALID_ARGUMENT.
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
BF
64'
)
benchmark
=
FakeGemmFlopsBenchmark
(
'fake'
,
parameters
=
'--precision
bf
64'
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
_benchmark_type
==
BenchmarkType
.
MICRO
)
assert
(
benchmark
.
run
()
is
False
)
assert
(
benchmark
.
run
()
is
False
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
NO_SUPPORTED_PRECISION
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
NO_SUPPORTED_PRECISION
)
tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py
View file @
9f56b219
...
@@ -98,14 +98,19 @@ the job to be terminated. The first process to do so was:
...
@@ -98,14 +98,19 @@ the job to be terminated. The first process to do so was:
# Check function process_raw_data.
# Check function process_raw_data.
# Positive case - valid raw output.
# Positive case - valid raw output.
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
test_name
=
'IsolatedNetworkTests'
metric_list
=
[
metric_list
=
[
'RRTwo-sidedLat(8B)'
,
'RRGetLat(8B)'
,
'RRTwo-sidedBW(131072B)'
,
'RRPutBW(131072B)'
,
'rr_two-sided_lat'
,
'RRTwo-sidedBW+Sync(131072B)'
,
'NatTwo-sidedBW(131072B)'
,
'MultipleAllreduce(8B)'
,
'MultipleAlltoall(4096B)'
'rr_get_lat'
,
'rr_two-sided_bw'
,
'rr_put_bw'
,
'rr_two-sided+sync_bw'
,
'nat_two-sided_bw'
,
'multiple_allreduce_time'
,
'multiple_alltoall_bw'
,
]
]
for
metric_medium
in
metric_list
:
for
metric_medium
in
metric_list
:
for
suffix
in
[
'
A
vg'
,
'99%'
]:
for
suffix
in
[
'
a
vg'
,
'99%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
metric
=
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
...
@@ -253,58 +258,10 @@ the job to be terminated. The first process to do so was:
...
@@ -253,58 +258,10 @@ the job to be terminated. The first process to do so was:
assert
(
len
(
benchmark
.
result
)
==
benchmark
.
default_metric_count
)
assert
(
len
(
benchmark
.
result
)
==
benchmark
.
default_metric_count
)
# Positive case - valid raw output.
# Positive case - valid raw output.
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
assert
(
benchmark
.
_process_raw_result
(
0
,
raw_output
))
test_name
=
'IsolatedNetworkTests'
metric_list
=
[
'rr_two-sided_lat_x'
,
'rr_two-sided+sync_bw_x'
,
'multiple_allreduce_x'
]
metric_list
=
[
'RRTwo-sidedLat(8B)'
,
'RRTwo-sidedBW+Sync(131072B)'
,
'MultipleAllreduce(8B)'
]
for
metric_medium
in
metric_list
:
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
for
suffix
in
[
'avg'
,
'99%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
metric
=
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'IsolatedCongestionTests'
metric_list
=
[
'GetBcast(4096B)'
,
'PutIncast(4096B)'
,
'Two-sidedIncast(4096B)'
,
'Alltoall(4096B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'NetworkTestsrunningwithCongestionTests(RRTwo-sidedLatNetworkTest)'
metric_list
=
[
'GetBcast(4096B)'
,
'PutIncast(4096B)'
,
'Two-sidedIncast(4096B)'
,
'Alltoall(4096B)'
,
'RRTwo-sidedLat(8B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'NetworkTestsrunningwithCongestionTests(RRTwo-sidedBW+SyncNetworkTest)'
metric_list
=
[
'GetBcast(4096B)'
,
'PutIncast(4096B)'
,
'Two-sidedIncast(4096B)'
,
'Alltoall(4096B)'
,
'RRTwo-sidedBW+Sync(131072B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'NetworkTestsrunningwithCongestionTests(MultipleAllreduceNetworkTest)'
metric_list
=
[
'GetBcast(4096B)'
,
'PutIncast(4096B)'
,
'Two-sidedIncast(4096B)'
,
'Alltoall(4096B)'
,
'MultipleAllreduce(8B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Max'
,
'Min'
,
'Avg'
,
'99.9%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
test_name
=
'NetworkTestsrunningwithCongestionTests-KeyResults'
metric_list
=
[
'RRTwo-sidedLat(8B)'
,
'RRTwo-sidedBW+Sync(131072B)'
,
'MultipleAllreduce(8B)'
]
for
metric_medium
in
metric_list
:
for
suffix
in
[
'Avg'
,
'99%'
]:
metric
=
test_name
+
'_'
+
metric_medium
+
'_'
+
suffix
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
...
...
tests/benchmarks/micro_benchmarks/test_gpu_copy_bw_performance.py
View file @
9f56b219
...
@@ -119,8 +119,8 @@ gpu0_to_gpu0_by_gpu0_using_dma_under_numa1 634.203
...
@@ -119,8 +119,8 @@ gpu0_to_gpu0_by_gpu0_using_dma_under_numa1 634.203
else
:
else
:
assert
(
len
(
benchmark
.
result
[
output_key
])
==
1
)
assert
(
len
(
benchmark
.
result
[
output_key
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
output_key
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
output_key
][
0
],
numbers
.
Number
))
assert
(
output_key
in
test_raw_output_dict
)
assert
(
output_key
.
strip
(
'_bw'
)
in
test_raw_output_dict
)
assert
(
test_raw_output_dict
[
output_key
]
==
benchmark
.
result
[
output_key
][
0
])
assert
(
test_raw_output_dict
[
output_key
.
strip
(
'_bw'
)
]
==
benchmark
.
result
[
output_key
][
0
])
# Negative case - invalid raw output.
# Negative case - invalid raw output.
assert
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
)
is
False
)
assert
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
)
is
False
)
...
...
tests/benchmarks/micro_benchmarks/test_ib_loopback_performance.py
View file @
9f56b219
...
@@ -158,7 +158,7 @@ remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97f
...
@@ -158,7 +158,7 @@ remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97f
metric_list
=
[]
metric_list
=
[]
for
ib_command
in
benchmark
.
_args
.
commands
:
for
ib_command
in
benchmark
.
_args
.
commands
:
for
size
in
[
'8388608'
,
'4194304'
,
'1024'
,
'2'
]:
for
size
in
[
'8388608'
,
'4194304'
,
'1024'
,
'2'
]:
metric
=
'
IB
_{}_{}_
Avg_{}
'
.
format
(
ib_command
,
size
,
str
(
benchmark
.
_args
.
ib_index
))
metric
=
'
ib
_{}_{}_
ib{}_bw
'
.
format
(
ib_command
,
size
,
str
(
benchmark
.
_args
.
ib_index
))
metric_list
.
append
(
metric
)
metric_list
.
append
(
metric
)
for
metric
in
metric_list
:
for
metric
in
metric_list
:
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
...
@@ -270,7 +270,7 @@ remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97f
...
@@ -270,7 +270,7 @@ remote address: LID 0xd06 QPN 0x092f PSN 0x3ff1bc RKey 0x080329 VAddr 0x007fc97f
# Positive case - valid raw output.
# Positive case - valid raw output.
metric_list
=
[]
metric_list
=
[]
for
ib_command
in
benchmark
.
_args
.
commands
:
for
ib_command
in
benchmark
.
_args
.
commands
:
metric
=
'
IB
_{}_8388608_
Avg_{}
'
.
format
(
ib_command
,
str
(
benchmark
.
_args
.
ib_index
))
metric
=
'
ib
_{}_8388608_
ib{}_bw
'
.
format
(
ib_command
,
str
(
benchmark
.
_args
.
ib_index
))
metric_list
.
append
(
metric
)
metric_list
.
append
(
metric
)
for
metric
in
metric_list
:
for
metric
in
metric_list
:
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
...
...
tests/benchmarks/micro_benchmarks/test_ib_traffic_performance.py
View file @
9f56b219
...
@@ -27,6 +27,9 @@ class IBBenchmarkTest(unittest.TestCase):
...
@@ -27,6 +27,9 @@ class IBBenchmarkTest(unittest.TestCase):
def
tearDown
(
self
):
def
tearDown
(
self
):
"""Method called after the test method has been called and the result recorded."""
"""Method called after the test method has been called and the result recorded."""
self
.
__binary_file
.
unlink
()
self
.
__binary_file
.
unlink
()
p
=
Path
(
'hostfile'
)
if
p
.
is_file
():
p
.
unlink
()
def
test_generate_config
(
self
):
# noqa: C901
def
test_generate_config
(
self
):
# noqa: C901
"""Test util functions ."""
"""Test util functions ."""
...
@@ -126,15 +129,18 @@ class IBBenchmarkTest(unittest.TestCase):
...
@@ -126,15 +129,18 @@ class IBBenchmarkTest(unittest.TestCase):
# Check preprocess
# Check preprocess
# Negative cases
# Negative cases
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
mock_ib_devices
.
return_value
=
None
mock_ib_devices
.
return_value
=
None
ret
=
benchmark
.
_preprocess
()
ret
=
benchmark
.
_preprocess
()
assert
(
ret
is
False
)
assert
(
ret
is
False
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
MICROBENCHMARK_MPI_INIT_FAILURE
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
MICROBENCHMARK_MPI_INIT_FAILURE
)
hosts
=
[
'node0
\n
'
,
'node1
\n
'
,
'node2
\n
'
,
'node3
\n
'
]
with
open
(
'hostfile'
,
'w'
)
as
f
:
f
.
writelines
(
hosts
)
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'4'
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'4'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
mock_ib_devices
.
return_value
=
None
mock_ib_devices
.
return_value
=
None
ret
=
benchmark
.
_preprocess
()
ret
=
benchmark
.
_preprocess
()
...
@@ -143,21 +149,21 @@ class IBBenchmarkTest(unittest.TestCase):
...
@@ -143,21 +149,21 @@ class IBBenchmarkTest(unittest.TestCase):
# Positive cases
# Positive cases
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'3'
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'3'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one'
parameters
=
'--ib_index 0 --iters 2000 --pattern one-to-one
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
ret
=
benchmark
.
_preprocess
()
ret
=
benchmark
.
_preprocess
()
assert
(
ret
is
True
)
assert
(
ret
is
True
)
# Generate config
# Generate config
parameters
=
'--ib_index 0 --iters 2000 --msg_size 33554432'
parameters
=
'--ib_index 0 --iters 2000 --msg_size 33554432
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'4'
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'4'
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
ret
=
benchmark
.
_preprocess
()
ret
=
benchmark
.
_preprocess
()
Path
(
'config.txt'
).
unlink
()
Path
(
'config.txt'
).
unlink
()
assert
(
ret
)
assert
(
ret
)
expect_command
=
'ib_validation --hostfile
/root/
hostfile --cmd_prefix "ib_write_bw -F '
+
\
expect_command
=
'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F '
+
\
'--iters=2000 -d mlx5_0 -s 33554432" --input_config '
+
os
.
getcwd
()
+
'/config.txt'
'--iters=2000 -d mlx5_0 -s 33554432" --input_config '
+
os
.
getcwd
()
+
'/config.txt'
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
0
].
split
(
benchmark
.
_bin_name
)[
1
]
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
0
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expect_command
)
assert
(
command
==
expect_command
)
...
@@ -167,14 +173,14 @@ class IBBenchmarkTest(unittest.TestCase):
...
@@ -167,14 +173,14 @@ class IBBenchmarkTest(unittest.TestCase):
with
open
(
'test_config.txt'
,
'w'
)
as
f
:
with
open
(
'test_config.txt'
,
'w'
)
as
f
:
for
line
in
config
:
for
line
in
config
:
f
.
write
(
line
+
'
\n
'
)
f
.
write
(
line
+
'
\n
'
)
parameters
=
'--ib_index 0 --iters 2000 --msg_size 33554432 --config test_config.txt'
parameters
=
'--ib_index 0 --iters 2000 --msg_size 33554432 --config test_config.txt
--hostfile hostfile
'
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
benchmark
=
benchmark_class
(
benchmark_name
,
parameters
=
parameters
)
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'2'
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
=
'2'
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
mock_ib_devices
.
return_value
=
[
'mlx5_0'
]
ret
=
benchmark
.
_preprocess
()
ret
=
benchmark
.
_preprocess
()
Path
(
'test_config.txt'
).
unlink
()
Path
(
'test_config.txt'
).
unlink
()
assert
(
ret
)
assert
(
ret
)
expect_command
=
'ib_validation --hostfile
/root/
hostfile --cmd_prefix "ib_write_bw -F '
+
\
expect_command
=
'ib_validation --hostfile hostfile --cmd_prefix "ib_write_bw -F '
+
\
'--iters=2000 -d mlx5_0 -s 33554432" --input_config test_config.txt'
'--iters=2000 -d mlx5_0 -s 33554432" --input_config test_config.txt'
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
0
].
split
(
benchmark
.
_bin_name
)[
1
]
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
0
].
split
(
benchmark
.
_bin_name
)[
1
]
...
...
tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py
View file @
9f56b219
...
@@ -36,7 +36,7 @@ def test_kernel_launch_overhead():
...
@@ -36,7 +36,7 @@ def test_kernel_launch_overhead():
assert
(
'raw_output_0'
in
benchmark
.
raw_data
)
assert
(
'raw_output_0'
in
benchmark
.
raw_data
)
assert
(
len
(
benchmark
.
raw_data
[
'raw_output_0'
])
==
1
)
assert
(
len
(
benchmark
.
raw_data
[
'raw_output_0'
])
==
1
)
assert
(
isinstance
(
benchmark
.
raw_data
[
'raw_output_0'
][
0
],
str
))
assert
(
isinstance
(
benchmark
.
raw_data
[
'raw_output_0'
][
0
],
str
))
for
metric
in
[
'event_
overhead
'
,
'wall_
overhead
'
]:
for
metric
in
[
'event_
time
'
,
'wall_
time
'
]:
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
tests/benchmarks/micro_benchmarks/test_matmul.py
View file @
9f56b219
...
@@ -35,6 +35,6 @@ def test_pytorch_matmul():
...
@@ -35,6 +35,6 @@ def test_pytorch_matmul():
# Check results and metrics.
# Check results and metrics.
assert
(
benchmark
.
run_count
==
2
)
assert
(
benchmark
.
run_count
==
2
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
assert
(
len
(
benchmark
.
raw_data
[
'nosharding'
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
raw_data
[
'nosharding
_time
'
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
raw_data
[
'nosharding'
][
0
])
==
benchmark
.
_args
.
num_steps
)
assert
(
len
(
benchmark
.
raw_data
[
'nosharding
_time
'
][
0
])
==
benchmark
.
_args
.
num_steps
)
assert
(
len
(
benchmark
.
result
[
'nosharding'
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
result
[
'nosharding
_time
'
])
==
benchmark
.
run_count
)
tests/benchmarks/micro_benchmarks/test_memory_bw_performance_base.py
View file @
9f56b219
...
@@ -83,7 +83,7 @@ def test_memory_bw_performance_base():
...
@@ -83,7 +83,7 @@ def test_memory_bw_performance_base():
for
i
in
range
(
len
(
expected_command
)):
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
assert
(
command
==
expected_command
[
i
])
for
i
,
metric
in
enumerate
([
'
H2D_Mem_BW'
,
'D2H_Mem_BW'
,
'D2D_Mem_BW
'
]):
for
i
,
metric
in
enumerate
([
'
h2d_bw'
,
'd2h_bw'
,
'd2d_bw
'
]):
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
...
@@ -97,7 +97,7 @@ def test_memory_bw_performance_base():
...
@@ -97,7 +97,7 @@ def test_memory_bw_performance_base():
for
i
in
range
(
len
(
expected_command
)):
for
i
in
range
(
len
(
expected_command
)):
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
command
=
benchmark
.
_bin_name
+
benchmark
.
_commands
[
i
].
split
(
benchmark
.
_bin_name
)[
1
]
assert
(
command
==
expected_command
[
i
])
assert
(
command
==
expected_command
[
i
])
for
i
,
metric
in
enumerate
([
'
H2D_Mem_BW'
,
'D2H_Mem_BW'
,
'D2D_Mem_BW
'
]):
for
i
,
metric
in
enumerate
([
'
h2d_bw'
,
'd2h_bw'
,
'd2d_bw
'
]):
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
...
...
tests/benchmarks/micro_benchmarks/test_rocm_gemm_flops_performance.py
View file @
9f56b219
...
@@ -92,11 +92,11 @@ T,N,7680,8192,8192,1,8416,0,8416,8416,8416,1, 162675, 6336.5
...
@@ -92,11 +92,11 @@ T,N,7680,8192,8192,1,8416,0,8416,8416,8416,1, 162675, 6336.5
assert
(
benchmark
.
_process_raw_result
(
3
,
raw_output_BF16_X
))
assert
(
benchmark
.
_process_raw_result
(
3
,
raw_output_BF16_X
))
assert
(
benchmark
.
_process_raw_result
(
4
,
raw_output_INT8_X
))
assert
(
benchmark
.
_process_raw_result
(
4
,
raw_output_INT8_X
))
assert
(
benchmark
.
result
[
'
FP64
'
][
0
]
==
10037.5
)
assert
(
benchmark
.
result
[
'
fp64_flops
'
][
0
]
==
10037.5
)
assert
(
benchmark
.
result
[
'
FP
32_x
DLOPS
'
][
0
]
==
39441.6
)
assert
(
benchmark
.
result
[
'
fp
32_x
dlops_flops
'
][
0
]
==
39441.6
)
assert
(
benchmark
.
result
[
'
FP
16_x
DLOPS
'
][
0
]
==
153728
)
assert
(
benchmark
.
result
[
'
fp
16_x
dlops_flops
'
][
0
]
==
153728
)
assert
(
benchmark
.
result
[
'
BF
16_x
DLOPS
'
][
0
]
==
81374.3
)
assert
(
benchmark
.
result
[
'
bf
16_x
dlops_flops
'
][
0
]
==
81374.3
)
assert
(
benchmark
.
result
[
'
INT8_xDLOPS
'
][
0
]
==
162675
)
assert
(
benchmark
.
result
[
'
int8_xdlops_iops
'
][
0
]
==
162675
)
# Negative case - Add invalid raw output.
# Negative case - Add invalid raw output.
assert
(
benchmark
.
_process_raw_result
(
4
,
'Invalid raw output'
)
is
False
)
assert
(
benchmark
.
_process_raw_result
(
4
,
'Invalid raw output'
)
is
False
)
tests/benchmarks/micro_benchmarks/test_rocm_memory_bw_performance.py
View file @
9f56b219
...
@@ -159,11 +159,11 @@ Note: results marked with (*) had missing values such as
...
@@ -159,11 +159,11 @@ Note: results marked with (*) had missing values such as
might occur with a mixture of architectural capabilities.
might occur with a mixture of architectural capabilities.
"""
"""
for
i
,
metric
in
enumerate
([
'h
tod_524288kB'
,
'htod_524288kB
'
]):
for
i
,
metric
in
enumerate
([
'h
2d_bw'
,
'd2h_bw
'
]):
assert
(
benchmark
.
_process_raw_result
(
i
,
raw_output
[
i
]))
assert
(
benchmark
.
_process_raw_result
(
i
,
raw_output
[
i
]))
assert
(
metric
in
benchmark
.
result
)
assert
(
metric
in
benchmark
.
result
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
1
)
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
metric
][
0
],
numbers
.
Number
))
assert
(
benchmark
.
result
[
'h
tod_524288kB
'
][
0
]
==
2
4.6708
)
assert
(
benchmark
.
result
[
'h
2d_bw
'
][
0
]
==
2
5.2351
)
assert
(
benchmark
.
result
[
'd
toh_524288kB
'
][
0
]
==
27.9348
)
assert
(
benchmark
.
result
[
'd
2h_bw
'
][
0
]
==
27.9348
)
tests/benchmarks/micro_benchmarks/test_sharding_matmul.py
View file @
9f56b219
...
@@ -44,7 +44,7 @@ def test_pytorch_sharding_matmul():
...
@@ -44,7 +44,7 @@ def test_pytorch_sharding_matmul():
# Check results and metrics.
# Check results and metrics.
assert
(
benchmark
.
run_count
==
2
)
assert
(
benchmark
.
run_count
==
2
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
for
metric
in
[
'allreduce'
,
'allgather'
]:
for
metric
in
[
'allreduce
_time
'
,
'allgather
_time
'
]:
assert
(
len
(
benchmark
.
raw_data
[
metric
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
raw_data
[
metric
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
raw_data
[
metric
][
0
])
==
benchmark
.
_args
.
num_steps
)
assert
(
len
(
benchmark
.
raw_data
[
metric
][
0
])
==
benchmark
.
_args
.
num_steps
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
benchmark
.
run_count
)
assert
(
len
(
benchmark
.
result
[
metric
])
==
benchmark
.
run_count
)
...
...
tests/benchmarks/micro_benchmarks/test_tcp_connectivity.py
View file @
9f56b219
...
@@ -52,15 +52,15 @@ class TCPConnectivityBenchmarkTest(unittest.TestCase):
...
@@ -52,15 +52,15 @@ class TCPConnectivityBenchmarkTest(unittest.TestCase):
assert
(
benchmark
.
result
)
assert
(
benchmark
.
result
)
# Check results and metrics.
# Check results and metrics.
assert
(
benchmark
.
result
[
'
Successed_
api.github.com'
][
0
]
==
10
)
assert
(
benchmark
.
result
[
'api.github.com
_successed_count
'
][
0
]
==
10
)
assert
(
benchmark
.
result
[
'
Failed_
api.github.com'
][
0
]
==
0
)
assert
(
benchmark
.
result
[
'api.github.com
_failed_count
'
][
0
]
==
0
)
assert
(
benchmark
.
result
[
'
Success_Rate_
api.github.com'
][
0
]
==
100.0
)
assert
(
benchmark
.
result
[
'api.github.com
_success_rate
'
][
0
]
==
100.0
)
assert
(
isinstance
(
benchmark
.
result
[
'
Minimum_
api.github.com'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'api.github.com
_time_min
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Maximum_
api.github.com'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'api.github.com
_time_max
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Average_
api.github.com'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'api.github.com
_time_avg
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
S
uccessed_
localhos
t'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
localhost_s
uccessed_
coun
t'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Failed_
localhost'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'localhost
_failed_count
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Maximum_
localhost'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'localhost
_time_max
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Minimum_
localhost'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'localhost
_time_min
'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'
Average_
localhost'
][
0
],
numbers
.
Number
))
assert
(
isinstance
(
benchmark
.
result
[
'localhost
_time_avg
'
][
0
],
numbers
.
Number
))
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
tests/benchmarks/micro_benchmarks/test_tensorrt_inference_performance.py
View file @
9f56b219
...
@@ -135,9 +135,9 @@ class TensorRTInferenceBenchmarkTestCase(unittest.TestCase):
...
@@ -135,9 +135,9 @@ class TensorRTInferenceBenchmarkTestCase(unittest.TestCase):
self
.
assertEqual
(
6
+
benchmark
.
default_metric_count
,
len
(
benchmark
.
result
))
self
.
assertEqual
(
6
+
benchmark
.
default_metric_count
,
len
(
benchmark
.
result
))
for
tag
in
[
'mean'
,
'99'
]:
for
tag
in
[
'mean'
,
'99'
]:
self
.
assertEqual
(
0.5
,
benchmark
.
result
[
f
'
gpu_lat_ms
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
0.5
,
benchmark
.
result
[
f
'
model_0_gpu_time
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
0.6
,
benchmark
.
result
[
f
'
host_lat_ms
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
0.6
,
benchmark
.
result
[
f
'
model_0_host_time
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
1.0
,
benchmark
.
result
[
f
'end_to_end_
lat_ms
_
{
tag
}
'
][
0
])
self
.
assertEqual
(
1.0
,
benchmark
.
result
[
f
'
model_0_
end_to_end_
time
_
{
tag
}
'
][
0
])
# Negative case - invalid raw output
# Negative case - invalid raw output
self
.
assertFalse
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
))
self
.
assertFalse
(
benchmark
.
_process_raw_result
(
1
,
'Invalid raw output'
))
tests/benchmarks/model_benchmarks/test_model_base.py
View file @
9f56b219
...
@@ -223,10 +223,10 @@ def test_train():
...
@@ -223,10 +223,10 @@ def test_train():
expected_result
=
(
expected_result
=
(
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {'
'"start_time": null, "end_time": null, "raw_data": {'
'"
steptime_train_float32
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
fp32_train_step_time
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
throughput_train_float32
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"
fp32_train_throughput
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "
steptime_train_float32": [2.0], "throughput_train_float32
": [16000.0]}, '
'"result": {"return_code": [0], "
fp32_train_step_time": [2.0], "fp32_train_throughput
": [16000.0]}, '
'"reduce_op": {"
steptime_train_float32": "max", "throughput_train_float32
": "min"}}'
'"reduce_op": {"
fp32_train_step_time": "max", "fp32_train_throughput
": "min"}}'
)
)
assert
(
benchmark
.
_preprocess
())
assert
(
benchmark
.
_preprocess
())
assert
(
benchmark
.
_ModelBenchmark__train
(
Precision
.
FLOAT32
))
assert
(
benchmark
.
_ModelBenchmark__train
(
Precision
.
FLOAT32
))
...
@@ -249,10 +249,11 @@ def test_inference():
...
@@ -249,10 +249,11 @@ def test_inference():
expected_result
=
(
expected_result
=
(
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, '
'"start_time": null, "end_time": null, "raw_data": {'
'"start_time": null, "end_time": null, "raw_data": {'
'"steptime_inference_float16": [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]], '
'"fp16_inference_step_time": [[4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]], '
'"throughput_inference_float16": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"fp16_inference_throughput": [[8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0]]}, '
'"result": {"return_code": [0], "steptime_inference_float16": [4.0], "throughput_inference_float16": '
'"result": {"return_code": [0], '
'[8000.0]}, "reduce_op": {"steptime_inference_float16": null, "throughput_inference_float16": null}}'
'"fp16_inference_step_time": [4.0], "fp16_inference_throughput": [8000.0]}, '
'"reduce_op": {"fp16_inference_step_time": null, "fp16_inference_throughput": null}}'
)
)
assert
(
benchmark
.
_preprocess
())
assert
(
benchmark
.
_preprocess
())
assert
(
benchmark
.
_ModelBenchmark__inference
(
Precision
.
FLOAT16
))
assert
(
benchmark
.
_ModelBenchmark__inference
(
Precision
.
FLOAT16
))
...
@@ -280,31 +281,31 @@ def test_benchmark():
...
@@ -280,31 +281,31 @@ def test_benchmark():
assert
(
benchmark
.
run_count
==
1
)
assert
(
benchmark
.
run_count
==
1
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
SUCCESS
)
expected_raw_data
=
{
expected_raw_data
=
{
'
steptime_train_float32
'
:
[[
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
]],
'
fp32_train_step_time
'
:
[[
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
]],
'
throughput_train_float32
'
:
[[
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
]],
'
fp32_train_throughput
'
:
[[
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
]],
'
steptime_train_float16
'
:
[[
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
]],
'
fp16_train_step_time
'
:
[[
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
,
2.0
]],
'
throughput_train_float16
'
:
[[
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
]]
'
fp16_train_throughput
'
:
[[
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
,
16000.0
]]
}
}
assert
(
benchmark
.
raw_data
==
expected_raw_data
)
assert
(
benchmark
.
raw_data
==
expected_raw_data
)
expected_result
=
{
expected_result
=
{
'return_code'
:
[
0
],
'return_code'
:
[
0
],
'
steptime_train_float32
'
:
[
2.0
],
'
fp32_train_step_time
'
:
[
2.0
],
'
throughput_train_float32
'
:
[
16000.0
],
'
fp32_train_throughput
'
:
[
16000.0
],
'
steptime_train_float16
'
:
[
2.0
],
'
fp16_train_step_time
'
:
[
2.0
],
'
throughput_train_float16
'
:
[
16000.0
]
'
fp16_train_throughput
'
:
[
16000.0
]
}
}
assert
(
benchmark
.
result
==
expected_result
)
assert
(
benchmark
.
result
==
expected_result
)
expected_serialized_result
=
(
expected_serialized_result
=
(
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, "start_time": null, '
'{"name": "pytorch-fake-model", "type": "model", "run_count": 1, "return_code": 0, "start_time": null, '
'"end_time": null, "raw_data": {"
steptime_train_float32
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"end_time": null, "raw_data": {"
fp32_train_step_time
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
throughput_train_float32
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
'"
fp32_train_throughput
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]], '
'"
steptime_train_float16
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
fp16_train_step_time
": [[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0]], '
'"
throughput_train_float16
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"
fp16_train_throughput
": [[16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0, 16000.0]]}, '
'"result": {"return_code": [0], "
steptime_train_float32": [2.0], "throughput_train_float32
": [16000.0], '
'"result": {"return_code": [0], "
fp32_train_step_time": [2.0], "fp32_train_throughput
": [16000.0], '
'"
steptime_train_float16": [2.0], "throughput_train_float16
": [16000.0]}, '
'"
fp16_train_step_time": [2.0], "fp16_train_throughput
": [16000.0]}, '
'"reduce_op": {"
steptime_train_float32": "max", "throughput_train_float32
": "min", '
'"reduce_op": {"
fp32_train_step_time": "max", "fp32_train_throughput
": "min", '
'"
steptime_train_float16": "max", "throughput_train_float16
": "min"}}'
'"
fp16_train_step_time": "max", "fp16_train_throughput
": "min"}}'
)
)
assert
(
benchmark
.
serialized_result
==
expected_serialized_result
)
assert
(
benchmark
.
serialized_result
==
expected_serialized_result
)
...
...
tests/benchmarks/model_benchmarks/test_pytorch_base.py
View file @
9f56b219
...
@@ -188,8 +188,7 @@ def test_pytorch_base():
...
@@ -188,8 +188,7 @@ def test_pytorch_base():
# Test results.
# Test results.
for
metric
in
[
for
metric
in
[
'steptime_train_float32'
,
'steptime_inference_float32'
,
'throughput_train_float32'
,
'fp32_train_step_time'
,
'fp32_inference_step_time'
,
'fp32_train_throughput'
,
'fp32_inference_throughput'
'throughput_inference_float32'
]:
]:
assert
(
len
(
benchmark
.
raw_data
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
raw_data
[
metric
])
==
1
)
assert
(
len
(
benchmark
.
raw_data
[
metric
][
0
])
==
64
)
assert
(
len
(
benchmark
.
raw_data
[
metric
][
0
])
==
64
)
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment