Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
72c9f129
Commit
72c9f129
authored
Sep 20, 2024
by
Jun Liu
Browse files
Merge branch 'amd-develop' into amd-master
parents
241c261f
ded0d83d
Changes
235
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
337 additions
and
128 deletions
+337
-128
script/process_perf_data.py
script/process_perf_data.py
+15
-3
script/process_qa_data.sh
script/process_qa_data.sh
+3
-2
script/profile_grouped_conv_bwd_data.sh
script/profile_grouped_conv_bwd_data.sh
+0
-0
script/profile_grouped_conv_bwd_weight.sh
script/profile_grouped_conv_bwd_weight.sh
+22
-21
script/profile_grouped_conv_fwd.sh
script/profile_grouped_conv_fwd.sh
+39
-0
script/run_full_performance_tests.sh
script/run_full_performance_tests.sh
+20
-14
script/run_performance_tests.sh
script/run_performance_tests.sh
+15
-0
test/data_type/CMakeLists.txt
test/data_type/CMakeLists.txt
+4
-0
test/data_type/test_bf8.cpp
test/data_type/test_bf8.cpp
+23
-19
test/data_type/test_fp8.cpp
test/data_type/test_fp8.cpp
+23
-19
test/gemm_universal/test_gemm_universal_xdl.cpp
test/gemm_universal/test_gemm_universal_xdl.cpp
+7
-2
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
...uped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
+10
-2
test/grouped_convnd_fwd/CMakeLists.txt
test/grouped_convnd_fwd/CMakeLists.txt
+6
-0
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
+23
-46
test/grouped_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp
...ed_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp
+127
-0
No files found.
script/process_perf_data.py
View file @
72c9f129
...
...
@@ -122,7 +122,7 @@ def parse_logfile(logfile):
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
test_list
=
list
(
range
(
1
,
len
(
tests
)
+
1
))
#parse conv_fwd and conv_bwd performance tests:
elif
'conv_fwd'
in
logfile
or
'conv_bwd
_data
'
in
logfile
:
elif
'conv_fwd'
in
logfile
or
'conv_bwd'
in
logfile
:
for
line
in
open
(
logfile
):
if
'tflops:'
in
line
:
lst
=
line
.
split
()
...
...
@@ -274,14 +274,26 @@ def main():
for
i
in
range
(
1
,
len
(
results
)
+
1
):
testlist
.
append
(
"Test%i"
%
i
)
table_name
=
"ck_grouped_gemm_tflops"
if
'conv_fwd'
in
filename
:
if
'
perf_
conv_fwd'
in
filename
:
for
i
in
range
(
1
,
len
(
results
)
+
1
):
testlist
.
append
(
"Test%i"
%
i
)
table_name
=
"ck_conv_fwd_tflops"
if
'conv_bwd_data'
in
filename
:
if
'
perf_
conv_bwd_data'
in
filename
:
for
i
in
range
(
1
,
len
(
results
)
+
1
):
testlist
.
append
(
"Test%i"
%
i
)
table_name
=
"ck_conv_bwd_data_tflops"
if
'grouped_conv_fwd'
in
filename
:
for
i
in
range
(
1
,
len
(
results
)
+
1
):
testlist
.
append
(
"Test%i"
%
i
)
table_name
=
"ck_grouped_conv_fwd_tflops"
if
'grouped_conv_bwd_data'
in
filename
:
for
i
in
range
(
1
,
len
(
results
)
+
1
):
testlist
.
append
(
"Test%i"
%
i
)
table_name
=
"ck_grouped_conv_bwd_data_tflops"
if
'grouped_conv_bwd_weight'
in
filename
:
for
i
in
range
(
1
,
len
(
results
)
+
1
):
testlist
.
append
(
"Test%i"
%
i
)
table_name
=
"ck_grouped_conv_bwd_weight_tflops"
if
'gemm_bilinear'
in
filename
:
for
i
in
range
(
1
,
len
(
results
)
+
1
):
testlist
.
append
(
"Test%i"
%
i
)
...
...
script/process_qa_data.sh
View file @
72c9f129
...
...
@@ -15,8 +15,9 @@ python3 process_perf_data.py perf_resnet50_N256.log
python3 process_perf_data.py perf_resnet50_N4.log
python3 process_perf_data.py perf_batched_gemm.log
python3 process_perf_data.py perf_grouped_gemm.log
python3 process_perf_data.py perf_conv_fwd.log
python3 process_perf_data.py perf_conv_bwd_data.log
python3 process_perf_data.py perf_grouped_conv_fwd.log
python3 process_perf_data.py perf_grouped_conv_bwd_data.log
python3 process_perf_data.py perf_grouped_conv_bwd_weight.log
python3 process_perf_data.py perf_gemm_bilinear.log
python3 process_perf_data.py perf_reduction.log
python3 process_perf_data.py perf_splitK_gemm.log
...
...
script/profile_conv_bwd_data.sh
→
script/profile_
grouped_
conv_bwd_data.sh
View file @
72c9f129
File moved
script/profile_conv_
f
wd.sh
→
script/profile_
grouped_
conv_
b
wd
_weight
.sh
View file @
72c9f129
...
...
@@ -12,27 +12,28 @@ INIT=$5
LOG
=
$6
TIME
=
$7
N
=
$8
N
=
$8
SplitK
=
$9
# Resnet50
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 3 7 7 224 224 2 2 1 1 3 3 3 3
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 56 56 2 2 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 28 28 2 2 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 256 1 1 56 56 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 256 1 1 56 56 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 14 14 2 2 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 512 1 1 28 28 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 7 7 1 1 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 1 1 56 56 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 3 7 7 224 224 2 2 1 1 3 3 3 3
$SplitK
script/profile_grouped_conv_fwd.sh
0 → 100755
View file @
72c9f129
#!/bin/bash
## GPU visibility
export
HIP_VISIBLE_DEVICES
=
0
DRIVER
=
"../build/bin/ckProfiler"
OP
=
$1
DATATYPE
=
$2
LAYOUT
=
$3
INDEXTYPE
=
$4
VERIFY
=
$5
INIT
=
$6
LOG
=
$7
TIME
=
$8
N
=
$9
# Resnet50
######## op datatype indextype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 3 7 7 224 224 2 2 1 1 3 3 3 3
script/run_full_performance_tests.sh
View file @
72c9f129
...
...
@@ -90,21 +90,27 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name
./profile_gemm_bilinear.sh gemm_bilinear 1 2
$verify
1 0 1 2>&1 |
tee
-a
$gemm_bilinear_log
./profile_gemm_bilinear.sh gemm_bilinear 1 3
$verify
1 0 1 2>&1 |
tee
-a
$gemm_bilinear_log
#run conv_fwd tests
export
conv_fwd_log
=
"perf_conv_fwd.log"
print_log_header
$conv_fwd_log
$env_type
$branch
$host_name
./profile_conv_fwd.sh conv_fwd 0 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_fwd_log
./profile_conv_fwd.sh conv_fwd 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_fwd_log
./profile_conv_fwd.sh conv_fwd 2 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_fwd_log
./profile_conv_fwd.sh conv_fwd 3 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_fwd_log
#run grouped_fwd tests
export
grouped_conv_fwd_log
=
"perf_grouped_conv_fwd.log"
print_log_header
$grouped_conv_fwd_log
$env_type
$branch
$host_name
./profile_grouped_conv_fwd.sh grouped_conv_fwd 0 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
./profile_grouped_conv_fwd.sh grouped_conv_fwd 2 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
#run conv_bwd_data tests
export
conv_bwd_data_log
=
"perf_conv_bwd_data.log"
print_log_header
$conv_bwd_data_log
$env_type
$branch
$host_name
./profile_conv_bwd_data.sh conv_bwd_data 0 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 2 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 3 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_bwd_data_log
#run grouped_bwd_data tests
export
grouped_conv_bwd_data_log
=
"perf_grouped_conv_bwd_data.log"
print_log_header
$grouped_conv_bwd_data_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 0 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 2 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
#run grouped_bwd_weight tests
export
grouped_conv_bwd_weight_log
=
"perf_grouped_conv_bwd_weight.log"
print_log_header
$grouped_conv_bwd_weight_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 0 2
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 2 2
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2
$verify
1 0 1 256 4 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
#run resnet50 tests
export
resnet256_log
=
"perf_resnet50_N256.log"
...
...
script/run_performance_tests.sh
View file @
72c9f129
...
...
@@ -51,6 +51,21 @@ print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 2 3
$verify
1 0 1 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 3
$verify
1 0 1 |
tee
-a
$gemm_log
#run grouped_fwd fp16 tests
export
grouped_conv_fwd_log
=
"perf_grouped_conv_fwd_fp16.log"
print_log_header
$conv_fwd_log
$env_type
$branch
$host_name
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
#run grouped_bwd_data fp16 tests
export
grouped_conv_bwd_data_log
=
"perf_grouped_conv_bwd_data_fp16.log"
print_log_header
$grouped_conv_bwd_data_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
#run grouped_bwd_weight fp16 tests
export
grouped_conv_bwd_weight_log
=
"perf_grouped_conv_bwd_weight_fp16.log"
print_log_header
$grouped_conv_bwd_weight_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
#run resnet50 tests
export
resnet256_log
=
"perf_resnet50_N256.log"
print_log_header
$resnet256_log
$env_type
$branch
$host_name
...
...
test/data_type/CMakeLists.txt
View file @
72c9f129
# temporarily disable flaky test for all architectures
add_definitions
(
-DCK_SKIP_FLAKY_F8_TEST
)
set
(
CK_SKIP_FLAKY_F8_TEST
"ON"
)
if
(
USE_BITINT_EXTENSION_INT4
)
add_gtest_executable
(
test_int4 test_int4.cpp
)
if
(
result EQUAL 0
)
...
...
test/data_type/test_bf8.cpp
View file @
72c9f129
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
bf8_t
;
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
...
...
@@ -24,33 +25,36 @@ TEST(BF8, ConvertFP32Nearest)
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to bf8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
type_convert
<
bf8_t
>
(
0.0
f
)),
abs_tol
);
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
0.0
f
)),
abs_tol
);
// don't run the next test on gfx11 devices
#ifndef CK_SKIP_FLAKY_F8_TEST
// convert minimal float to bf8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
#endif
// convert maximal bf8_t to float and check if equal to 57344.0
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
57344.0
f
)),
abs_tol
);
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
57344.0
f
)),
abs_tol
);
// convert maximal float to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to bf8_t and check if it is qNan
ASSERT_NEAR
(
type_convert
<
bf8_t
>
(
0x80
),
type
_convert
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
f8
_convert
_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive norm float value to bf8 and back, check if holds
float
pos_float
=
0.0000762939
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
pos_float
)),
abs_tol
);
// negative norm float value to bf8 and back, check if holds
float
neg_float
=
-
0.0000610351
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
neg_float
)),
abs_tol
);
// positive subnorm float value to bf8 and back, check if holds
pos_float
=
0.0000305175
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
pos_float
)),
abs_tol
);
// negative subnorm float value to bf8 and back, check if holds
neg_float
=
-
0.0000152587
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
BF8
,
ConvertFP32Stochastic
)
...
...
@@ -92,34 +96,34 @@ TEST(BF8, ConvertFP16Nearest)
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to bf8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
half_t
{
0.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to bf8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal bf8_t to fp16 and check if equal to 57344.0
ASSERT_NEAR
(
half_t
{
57344.0
},
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
half_t
{
57344.0
})),
abs_tol
);
half_t
{
57344.0
},
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
half_t
{
57344.0
})),
abs_tol
);
// convert maximal fp16 to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR
(
half_t
{
57344.0
},
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to bf8_t and check if it is QuietNaN
ASSERT_NEAR
(
type_convert
<
bf8_t
>
(
0x80
),
type
_convert
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
f8
_convert
_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive norm fp16 value to bf8 and back, check if holds
half_t
pos_half
=
half_t
{
0.0000762939
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
pos_half
)),
abs_tol
);
// negative norm fp16 value to bf8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.0000610351
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
neg_half
)),
abs_tol
);
// positive subnorm fp16 value to bf8 and back, check if holds
pos_half
=
half_t
{
0.0000305175
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
pos_half
)),
abs_tol
);
// negative subnorm fp16 value to bf8 and back, check if holds
neg_half
=
half_t
{
-
0.0000152587
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
neg_half
)),
abs_tol
);
}
TEST
(
BF8
,
ConvertFP16Stochastic
)
...
...
test/data_type/test_fp8.cpp
View file @
72c9f129
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
f8_t
;
using
ck
::
half_t
;
...
...
@@ -24,33 +25,36 @@ TEST(FP8, ConvertFP32Nearest)
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
type_convert
<
f8_t
>
(
0.0
f
)),
abs_tol
);
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
0.0
f
)),
abs_tol
);
// don't run the next test on gfx11 devices
#ifndef CK_SKIP_FLAKY_F8_TEST
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
#endif
// convert maximal f8_t to float and check if equal to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
240.0
f
)),
abs_tol
);
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
240.0
f
)),
abs_tol
);
// convert maximal float to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to f8_t and check if it is qNan
ASSERT_NEAR
(
type_convert
<
f8_t
>
(
0x80
),
type
_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
f8
_convert
_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive norm float value to fp8 and back, check if holds
float
pos_float
=
0.017578125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
pos_float
)),
abs_tol
);
// negative norm float value to fp8 and back, check if holds
float
neg_float
=
-
0.015625
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
neg_float
)),
abs_tol
);
// positive subnorm float value to fp8 and back, check if holds
pos_float
=
0.00390625
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
pos_float
)),
abs_tol
);
// negative subnorm float value to fp8 and back, check if holds
neg_float
=
-
0.001953125
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP32Stochastic
)
...
...
@@ -92,33 +96,33 @@ TEST(FP8, ConvertFP16Nearest)
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
half_t
{
0.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal f8_t to fp16 and check if equal to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
half_t
{
240.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
half_t
{
240.0
})),
abs_tol
);
// convert maximal fp16 to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to f8_t and check if it is QuietNaN
ASSERT_NEAR
(
type_convert
<
f8_t
>
(
0x80
),
type
_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
f8
_convert
_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive norm fp16 value to fp8 and back, check if holds
half_t
pos_half
=
half_t
{
0.017578125
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
pos_half
)),
abs_tol
);
// negative norm fp16 value to fp8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.015625
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
neg_half
)),
abs_tol
);
// positive subnorm fp16 value to fp8 and back, check if holds
pos_half
=
half_t
{
0.00390625
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
pos_half
)),
abs_tol
);
// negative subnorm fp16 value to fp8 and back, check if holds
neg_half
=
half_t
{
-
0.001953125
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
neg_half
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP16Stochastic
)
...
...
test/gemm_universal/test_gemm_universal_xdl.cpp
View file @
72c9f129
...
...
@@ -44,17 +44,22 @@ class TestGemmUniversal_MK_NK
using
KernelTypes_MK_KN
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
std
::
tuple
<
F16
,
F16
,
F16
,
F16
>
,
#if (defined CK_ENABLE_FP8)
std
::
tuple
<
F16
,
F8
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
,
#endif
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
>
;
using
KernelTypes_MK_NK
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
std
::
tuple
<
F16
,
F16
,
F16
,
F16
>
,
#if (defined CK_ENABLE_FP8)
std
::
tuple
<
F16
,
F8
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
,
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
,
#endif
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
>
;
// clang-format on
...
...
test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
View file @
72c9f129
...
...
@@ -66,6 +66,12 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
{
return
true
;
}
// Skip due to the lack of kernels for NGCDHW
if
constexpr
(
std
::
is_same_v
<
InLayout
,
NGCW
>
||
std
::
is_same_v
<
InLayout
,
NGCHW
>
||
std
::
is_same_v
<
InLayout
,
NGCDHW
>
)
{
return
true
;
}
}
else
{
...
...
@@ -139,7 +145,8 @@ using KernelTypes2d = ::testing::Types<
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
float
,
float
,
float
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>>
;
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
Number
<
2
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NGCHW
,
GKYXC
,
NGKHW
,
ck
::
Number
<
2
>>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
Number
<
3
>>
,
...
...
@@ -148,7 +155,8 @@ using KernelTypes3d = ::testing::Types<
std
::
tuple
<
float
,
float
,
float
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
int8_t
,
int8_t
,
int8_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>>
;
std
::
tuple
<
int8_t
,
int8_t
,
int8_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
Number
<
3
>>
,
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
NGCDHW
,
GKZYXC
,
NGKDHW
,
ck
::
Number
<
3
>>>
;
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight1d
,
KernelTypes1d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdWeight2d
,
KernelTypes2d
);
...
...
test/grouped_convnd_fwd/CMakeLists.txt
View file @
72c9f129
...
...
@@ -7,6 +7,12 @@ if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11")
endif
()
endif
()
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_executable
(
test_grouped_convnd_fwd_large_cases_xdl test_grouped_convnd_fwd_large_cases_xdl.cpp
)
target_compile_options
(
test_grouped_convnd_fwd_large_cases_xdl PRIVATE -Wno-global-constructors -Wno-undef
)
target_link_libraries
(
test_grouped_convnd_fwd_large_cases_xdl PRIVATE gtest_main getopt::getopt utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance
)
endif
()
add_gtest_executable
(
test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility
)
...
...
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
View file @
72c9f129
...
...
@@ -17,7 +17,7 @@ class TestGroupedConvndFwd : public ::testing::Test
using
InLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
IndexType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
IndexType
=
ck
::
index_t
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
...
...
@@ -50,31 +50,28 @@ class TestGroupedConvndFwd : public ::testing::Test
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes1d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNWC
,
GKXC
,
GNWK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
half_t
,
GNWC
,
GKXC
,
GNWK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNWC
,
GKXC
,
GNWK
,
ck
::
index_t
>
,
std
::
tuple
<
int8_t
,
GNWC
,
GKXC
,
GNWK
,
ck
::
index_t
>>
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
index_t
>
,
std
::
tuple
<
int8_t
,
GNHWC
,
GKYXC
,
GNHWK
,
ck
::
index_t
>
,
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
index_t
>
,
std
::
tuple
<
int8_t
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
index_t
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
index_t
>
,
std
::
tuple
<
int8_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
,
ck
::
index_t
>
,
std
::
tuple
<
float
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
index_t
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
index_t
>
,
std
::
tuple
<
int8_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ck
::
index_t
>>
;
using
KernelTypes2dLargeCases
=
::
testing
::
Types
<
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
,
ck
::
long_index_t
>>
;
using
KernelTypes1d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNWC
,
GKXC
,
GNWK
>
,
std
::
tuple
<
ck
::
half_t
,
GNWC
,
GKXC
,
GNWK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNWC
,
GKXC
,
GNWK
>
,
std
::
tuple
<
int8_t
,
GNWC
,
GKXC
,
GNWK
>>
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
int8_t
,
GNHWC
,
GKYXC
,
GNHWK
>
,
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
int8_t
,
NHWGC
,
GKYXC
,
NHWGK
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
int8_t
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
std
::
tuple
<
float
,
NDHWGC
,
GKZYXC
,
NDHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>
,
std
::
tuple
<
int8_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndFwd1d
:
public
TestGroupedConvndFwd
<
Tuple
>
...
...
@@ -91,15 +88,9 @@ class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndFwd2dLargeCases
:
public
TestGroupedConvndFwd
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndFwd1d
,
KernelTypes1d
);
TYPED_TEST_SUITE
(
TestGroupedConvndFwd2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndFwd3d
,
KernelTypes3d
);
TYPED_TEST_SUITE
(
TestGroupedConvndFwd2dLargeCases
,
KernelTypes2dLargeCases
);
TYPED_TEST
(
TestGroupedConvndFwd1d
,
Test1D
)
{
...
...
@@ -149,17 +140,3 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{
3
,
96
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
4
,
30
,
160
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
template
Run
<
3
>();
}
TYPED_TEST
(
TestGroupedConvndFwd2dLargeCases
,
Test2DLargeCases
)
{
// Case larger than 2GB
this
->
conv_params
.
push_back
(
{
2
,
1
,
64
,
4
,
192
,
{
2
,
2
},
{
224
,
224
},
{
224
,
224
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
// With supported NumGroupsToMerge > 1
this
->
conv_params
.
push_back
(
{
2
,
32
,
64
,
1
,
1
,
{
2
,
2
},
{
672
,
672
},
{
672
,
672
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
// When image is larger than 2GB
this
->
conv_params
.
push_back
(
{
2
,
1
,
1
,
256
,
256
,
{
3
,
3
},
{
4096
,
2048
},
{
1024
,
1024
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
}});
this
->
template
Run
<
2
>();
}
test/grouped_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp
0 → 100644
View file @
72c9f129
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndFwd
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
IndexType
=
ck
::
long_index_t
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
EXPECT_FALSE
(
conv_params
.
empty
());
bool
pass
=
true
;
for
(
auto
&
param
:
conv_params
)
{
pass
=
pass
&&
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
NDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
DataType
,
DataType
,
DataType
,
DataType
,
DataType
,
IndexType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
);
}
EXPECT_TRUE
(
pass
);
}
};
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
NDHWGC
,
GKZYXC
,
NDHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndFwd2d
:
public
TestGroupedConvndFwd
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndFwd3d
:
public
TestGroupedConvndFwd
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndFwd2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndFwd3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndFwd2d
,
Test2D
)
{
// Case larger than 2GB
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
4
,
192
,
{
2
,
2
},
{
224
,
224
},
{
224
,
224
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
// With supported NumGroupsToMerge > 1
this
->
conv_params
.
push_back
(
{
2
,
32
,
64
,
1
,
1
,
{
2
,
2
},
{
672
,
672
},
{
672
,
672
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
// When image is larger than 2GB
this
->
conv_params
.
push_back
(
{
2
,
2
,
2
,
128
,
128
,
{
3
,
3
},
{
4096
,
2048
},
{
300
,
300
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndFwd3d
,
Test3D
)
{
// Case larger than 2GB
this
->
conv_params
.
push_back
({
3
,
1
,
128
,
4
,
192
,
{
2
,
2
,
2
},
{
2
,
224
,
224
},
{
1
,
224
,
224
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
// With supported NumGroupsToMerge > 1
this
->
conv_params
.
push_back
({
3
,
32
,
64
,
1
,
1
,
{
2
,
2
,
2
},
{
360
,
2
,
672
},
{
360
,
2
,
672
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
// When image is larger than 2GB
this
->
conv_params
.
push_back
({
3
,
1
,
2
,
128
,
128
,
{
3
,
1
,
3
},
{
900
,
2
,
2048
},
{
300
,
1
,
300
},
{
3
,
2
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
template
Run
<
3
>();
}
Prev
1
…
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment