Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
f6ceef78
Commit
f6ceef78
authored
Aug 26, 2024
by
ThomasNing
Browse files
merge with the develop branch
parents
536c5458
25935b57
Changes
240
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
724 additions
and
651 deletions
+724
-651
script/process_qa_data.sh
script/process_qa_data.sh
+20
-2
script/profile_grouped_conv_bwd_data.sh
script/profile_grouped_conv_bwd_data.sh
+0
-0
script/profile_grouped_conv_bwd_weight.sh
script/profile_grouped_conv_bwd_weight.sh
+22
-21
script/profile_grouped_conv_fwd.sh
script/profile_grouped_conv_fwd.sh
+39
-0
script/run_full_performance_tests.sh
script/run_full_performance_tests.sh
+20
-14
script/run_performance_tests.sh
script/run_performance_tests.sh
+15
-0
script/test_reduce_with_index.sh
script/test_reduce_with_index.sh
+0
-63
test/CMakeLists.txt
test/CMakeLists.txt
+4
-4
test/conv_util/conv_util.cpp
test/conv_util/conv_util.cpp
+29
-27
test/data_type/CMakeLists.txt
test/data_type/CMakeLists.txt
+10
-0
test/data_type/test_bf8.cpp
test/data_type/test_bf8.cpp
+23
-19
test/data_type/test_fp8.cpp
test/data_type/test_fp8.cpp
+23
-19
test/gemm_universal/test_gemm_universal_xdl.cpp
test/gemm_universal/test_gemm_universal_xdl.cpp
+7
-2
test/grouped_convnd_fwd/CMakeLists.txt
test/grouped_convnd_fwd/CMakeLists.txt
+6
-0
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
+5
-20
test/grouped_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp
...ed_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp
+127
-0
test/reduce/CMakeLists.txt
test/reduce/CMakeLists.txt
+2
-2
test/reduce/reduce_no_index.cpp
test/reduce/reduce_no_index.cpp
+168
-213
test/reduce/reduce_with_index.cpp
test/reduce/reduce_with_index.cpp
+168
-213
test/smfmac_op/smfmac_op_xdl.cpp
test/smfmac_op/smfmac_op_xdl.cpp
+36
-32
No files found.
script/process_qa_data.sh
View file @
f6ceef78
...
...
@@ -15,9 +15,27 @@ python3 process_perf_data.py perf_resnet50_N256.log
python3 process_perf_data.py perf_resnet50_N4.log
python3 process_perf_data.py perf_batched_gemm.log
python3 process_perf_data.py perf_grouped_gemm.log
python3 process_perf_data.py perf_conv_fwd.log
python3 process_perf_data.py perf_conv_bwd_data.log
python3 process_perf_data.py perf_grouped_conv_fwd.log
python3 process_perf_data.py perf_grouped_conv_bwd_data.log
python3 process_perf_data.py perf_grouped_conv_bwd_weight.log
python3 process_perf_data.py perf_gemm_bilinear.log
python3 process_perf_data.py perf_reduction.log
python3 process_perf_data.py perf_splitK_gemm.log
python3 process_perf_data.py perf_onnx_gemm.log
file
=
./perf_fmha_fwd_gfx942.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
fi
file
=
./perf_fmha_bwd_gfx942.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_fmha_bwd_gfx942.log
fi
file
=
./perf_fmha_fwd_gfx90a.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_fmha_fwd_gfx90a.log
fi
file
=
./perf_fmha_bwd_gfx90a.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_fmha_bwd_gfx90a.log
fi
script/profile_conv_bwd_data.sh
→
script/profile_
grouped_
conv_bwd_data.sh
View file @
f6ceef78
File moved
script/profile_conv_
f
wd.sh
→
script/profile_
grouped_
conv_
b
wd
_weight
.sh
View file @
f6ceef78
...
...
@@ -12,27 +12,28 @@ INIT=$5
LOG
=
$6
TIME
=
$7
N
=
$8
N
=
$8
SplitK
=
$9
# Resnet50
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 3 7 7 224 224 2 2 1 1 3 3 3 3
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 56 56 2 2 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 28 28 2 2 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 256 1 1 56 56 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 256 1 1 56 56 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 14 14 2 2 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 512 1 1 28 28 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 7 7 1 1 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 1 1 56 56 1 1 1 1 0 0 0 0
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
$SplitK
$DRIVER
$OP
$DATATYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 3 7 7 224 224 2 2 1 1 3 3 3 3
$SplitK
script/profile_grouped_conv_fwd.sh
0 → 100755
View file @
f6ceef78
#!/bin/bash
## GPU visibility
export
HIP_VISIBLE_DEVICES
=
0
DRIVER
=
"../build/bin/ckProfiler"
OP
=
$1
DATATYPE
=
$2
LAYOUT
=
$3
INDEXTYPE
=
$4
VERIFY
=
$5
INIT
=
$6
LOG
=
$7
TIME
=
$8
N
=
$9
# Resnet50
######## op datatype indextype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER
$OP
$DATATYPE
$INDEXTYPE
$LAYOUT
$VERIFY
$INIT
$LOG
$TIME
2 1
$N
64 3 7 7 224 224 2 2 1 1 3 3 3 3
script/run_full_performance_tests.sh
View file @
f6ceef78
...
...
@@ -90,21 +90,27 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name
./profile_gemm_bilinear.sh gemm_bilinear 1 2
$verify
1 0 1 2>&1 |
tee
-a
$gemm_bilinear_log
./profile_gemm_bilinear.sh gemm_bilinear 1 3
$verify
1 0 1 2>&1 |
tee
-a
$gemm_bilinear_log
#run conv_fwd tests
export
conv_fwd_log
=
"perf_conv_fwd.log"
print_log_header
$conv_fwd_log
$env_type
$branch
$host_name
./profile_conv_fwd.sh conv_fwd 0 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_fwd_log
./profile_conv_fwd.sh conv_fwd 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_fwd_log
./profile_conv_fwd.sh conv_fwd 2 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_fwd_log
./profile_conv_fwd.sh conv_fwd 3 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_fwd_log
#run grouped_fwd tests
export
grouped_conv_fwd_log
=
"perf_grouped_conv_fwd.log"
print_log_header
$grouped_conv_fwd_log
$env_type
$branch
$host_name
./profile_grouped_conv_fwd.sh grouped_conv_fwd 0 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
./profile_grouped_conv_fwd.sh grouped_conv_fwd 2 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
#run conv_bwd_data tests
export
conv_bwd_data_log
=
"perf_conv_bwd_data.log"
print_log_header
$conv_bwd_data_log
$env_type
$branch
$host_name
./profile_conv_bwd_data.sh conv_bwd_data 0 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 2 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 3 1
$verify
1 0 1 256 2>&1 |
tee
-a
$conv_bwd_data_log
#run grouped_bwd_data tests
export
grouped_conv_bwd_data_log
=
"perf_grouped_conv_bwd_data.log"
print_log_header
$grouped_conv_bwd_data_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 0 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 2 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
#run grouped_bwd_weight tests
export
grouped_conv_bwd_weight_log
=
"perf_grouped_conv_bwd_weight.log"
print_log_header
$grouped_conv_bwd_weight_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 0 2
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 2 2
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2
$verify
1 0 1 256 4 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
#run resnet50 tests
export
resnet256_log
=
"perf_resnet50_N256.log"
...
...
script/run_performance_tests.sh
View file @
f6ceef78
...
...
@@ -51,6 +51,21 @@ print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 2 3
$verify
1 0 1 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 3
$verify
1 0 1 |
tee
-a
$gemm_log
#run grouped_fwd fp16 tests
export
grouped_conv_fwd_log
=
"perf_grouped_conv_fwd_fp16.log"
print_log_header
$conv_fwd_log
$env_type
$branch
$host_name
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
#run grouped_bwd_data fp16 tests
export
grouped_conv_bwd_data_log
=
"perf_grouped_conv_bwd_data_fp16.log"
print_log_header
$grouped_conv_bwd_data_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
#run grouped_bwd_weight fp16 tests
export
grouped_conv_bwd_weight_log
=
"perf_grouped_conv_bwd_weight_fp16.log"
print_log_header
$grouped_conv_bwd_weight_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
#run resnet50 tests
export
resnet256_log
=
"perf_resnet50_N256.log"
print_log_header
$resnet256_log
$env_type
$branch
$host_name
...
...
script/test_reduce_with_index.sh
deleted
100755 → 0
View file @
536c5458
#!/bin/bash
## The following will be used for CI
set
-x
## for float
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2,3 0 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2 0 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,3 0 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,2,3 0 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1,2,3 0 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0 0 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1 0 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
2 0 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
3 0 2
## for float64
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2,3 6 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2 6 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,3 6 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,2,3 6 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1,2,3 6 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0 6 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1 6 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
2 6 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
3 6 2
## for float16
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2,3 1 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2 1 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,3 1 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,2,3 1 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1,2,3 1 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0 1 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1 1 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
2 1 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
3 1 2
## for int8_t
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2,3 3 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2 3 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,3 3 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,2,3 3 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1,2,3 3 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0 3 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1 3 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
2 3 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
3 3 2
## for bfloat16
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2,3 5 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,2 5 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,1,3 5 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0,2,3 5 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1,2,3 5 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
0 5 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
1 5 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
2 5 2
bin/test_reduce_with_index
-D
64,4,280,82
-R
3 5 2
set
+x
test/CMakeLists.txt
View file @
f6ceef78
...
...
@@ -68,11 +68,11 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list
if
(
ARGN
)
if
(
ARGN MATCHES
"_xdl"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103
gfx1200 gfx1201
)
elseif
(
ARGN MATCHES
"_wmma"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
)
elseif
(
ARGN MATCHES
"_smfmac"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a
gfx1200 gfx1201
)
endif
()
set_source_files_properties
(
${
ARGN
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
...
...
@@ -149,11 +149,11 @@ function(add_gtest_executable TEST_NAME)
#only continue if there are some source files left on the list
if
(
ARGN
)
if
(
ARGN MATCHES
"_xdl"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103
gfx1200 gfx1201
)
elseif
(
ARGN MATCHES
"_wmma"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030
)
elseif
(
ARGN MATCHES
"_smfmac"
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a
)
list
(
REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a
gfx1200 gfx1201
)
endif
()
set_source_files_properties
(
${
ARGN
}
PROPERTIES LANGUAGE HIP
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
...
...
test/conv_util/conv_util.cpp
View file @
f6ceef78
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <string>
...
...
@@ -24,12 +24,12 @@ class TestConvUtil : public ::testing::Test
128
,
192
,
256
,
std
::
vector
<
ck
::
index_t
>
(
ndims
,
3
),
std
::
vector
<
ck
::
index_t
>
(
ndims
,
71
),
std
::
vector
<
ck
::
index_t
>
(
ndims
,
s
),
std
::
vector
<
ck
::
index_t
>
(
ndims
,
d
),
std
::
vector
<
ck
::
index_t
>
(
ndims
,
p
),
std
::
vector
<
ck
::
index_t
>
(
ndims
,
p
));
std
::
vector
<
ck
::
long_
index_t
>
(
ndims
,
3
),
std
::
vector
<
ck
::
long_
index_t
>
(
ndims
,
71
),
std
::
vector
<
ck
::
long_
index_t
>
(
ndims
,
s
),
std
::
vector
<
ck
::
long_
index_t
>
(
ndims
,
d
),
std
::
vector
<
ck
::
long_
index_t
>
(
ndims
,
p
),
std
::
vector
<
ck
::
long_
index_t
>
(
ndims
,
p
));
}
protected:
...
...
@@ -48,35 +48,35 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D)
{
// stride 2, dilation 1, pad 1
SetNDParams
(
1
,
2
,
1
,
1
);
std
::
vector
<
ck
::
index_t
>
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
std
::
vector
<
ck
::
long_
index_t
>
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
36
},
"Error: ConvParams 1D."
));
out_spatial_len
,
std
::
vector
<
ck
::
long_
index_t
>
{
36
},
"Error: ConvParams 1D."
));
// stride 1, dilation 1, pad 1
SetNDParams
(
1
,
1
,
1
,
1
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
71
},
"Error: ConvParams 1D stride {1}."
));
out_spatial_len
,
std
::
vector
<
ck
::
long_
index_t
>
{
71
},
"Error: ConvParams 1D stride {1}."
));
// stride 2, dilation 1, pad 2
SetNDParams
(
1
,
2
,
1
,
2
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
37
},
std
::
vector
<
ck
::
long_
index_t
>
{
37
},
"Error: ConvParams 1D padding left/right {2}."
));
// stride 2, dilation 2, pad 2
SetNDParams
(
1
,
2
,
2
,
2
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
36
},
"Error: ConvParams 1D dilation {2}."
));
out_spatial_len
,
std
::
vector
<
ck
::
long_
index_t
>
{
36
},
"Error: ConvParams 1D dilation {2}."
));
// stride 3, dilation 2, pad 1
SetNDParams
(
1
,
3
,
2
,
1
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
23
},
std
::
vector
<
ck
::
long_
index_t
>
{
23
},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."
));
}
...
...
@@ -84,36 +84,38 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
{
// stride 2, dilation 1, pad 1
SetNDParams
(
2
,
2
,
1
,
1
);
std
::
vector
<
ck
::
index_t
>
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
std
::
vector
<
ck
::
long_
index_t
>
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
36
,
36
},
std
::
vector
<
ck
::
long_
index_t
>
{
36
,
36
},
"Error: ConvParams 2D default constructor."
));
// stride 1, dilation 1, pad 1
SetNDParams
(
2
,
1
,
1
,
1
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
71
,
71
},
"Error: ConvParams 2D stride {1,1}."
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
long_index_t
>
{
71
,
71
},
"Error: ConvParams 2D stride {1,1}."
));
// stride 2, dilation 1, pad 2
SetNDParams
(
2
,
2
,
1
,
2
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
37
,
37
},
std
::
vector
<
ck
::
long_
index_t
>
{
37
,
37
},
"Error: ConvParams 2D padding left/right {2,2}."
));
// stride 2, dilation 2, pad 2
SetNDParams
(
2
,
2
,
2
,
2
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
36
,
36
},
"Error: ConvParams 2D dilation {2,2}."
));
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
long_index_t
>
{
36
,
36
},
"Error: ConvParams 2D dilation {2,2}."
));
// stride 3, dilation 2, pad 1
SetNDParams
(
2
,
3
,
2
,
1
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
23
,
23
},
std
::
vector
<
ck
::
long_
index_t
>
{
23
,
23
},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."
));
}
...
...
@@ -121,29 +123,29 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
{
// stride 2, dilation 1, pad 1
SetNDParams
(
3
,
2
,
1
,
1
);
std
::
vector
<
ck
::
index_t
>
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
std
::
vector
<
ck
::
long_
index_t
>
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
36
,
36
,
36
},
"Error: ConvParams 3D."
));
out_spatial_len
,
std
::
vector
<
ck
::
long_
index_t
>
{
36
,
36
,
36
},
"Error: ConvParams 3D."
));
// stride 1, dilation 1, pad 1
SetNDParams
(
3
,
1
,
1
,
1
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
71
,
71
,
71
},
std
::
vector
<
ck
::
long_
index_t
>
{
71
,
71
,
71
},
"Error: ConvParams 3D stride {1, 1, 1}."
));
// stride 2, dilation 1, pad 2
SetNDParams
(
3
,
2
,
1
,
2
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
37
,
37
,
37
},
std
::
vector
<
ck
::
long_
index_t
>
{
37
,
37
,
37
},
"Error: ConvParams 3D padding left/right {2, 2, 2}."
));
// stride 2, dilation 2, pad 2
SetNDParams
(
3
,
2
,
2
,
2
);
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
36
,
36
,
36
},
std
::
vector
<
ck
::
long_
index_t
>
{
36
,
36
,
36
},
"Error: ConvParams 3D dilation {2, 2, 2}."
));
// stride 3, dilation 2, pad 1
...
...
@@ -151,6 +153,6 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
out_spatial_len
=
conv_params
.
GetOutputSpatialLengths
();
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
out_spatial_len
,
std
::
vector
<
ck
::
index_t
>
{
23
,
23
,
23
},
std
::
vector
<
ck
::
long_
index_t
>
{
23
,
23
,
23
},
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."
));
}
test/data_type/CMakeLists.txt
View file @
f6ceef78
if
(
GPU_TARGETS
)
if
(
GPU_TARGETS MATCHES
"gfx10"
OR GPU_TARGETS MATCHES
"gfx11"
OR GPU_TARGETS MATCHES
"gfx12"
)
add_definitions
(
-DCK_SKIP_FLAKY_F8_TEST
)
set
(
CK_SKIP_FLAKY_F8_TEST
"ON"
)
endif
()
else
()
add_definitions
(
-DCK_SKIP_FLAKY_F8_TEST
)
set
(
CK_SKIP_FLAKY_F8_TEST
"ON"
)
endif
()
if
(
USE_BITINT_EXTENSION_INT4
)
add_gtest_executable
(
test_int4 test_int4.cpp
)
if
(
result EQUAL 0
)
...
...
test/data_type/test_bf8.cpp
View file @
f6ceef78
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
bf8_t
;
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
half_t
;
using
ck
::
type_convert
;
...
...
@@ -24,33 +25,36 @@ TEST(BF8, ConvertFP32Nearest)
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to bf8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
type_convert
<
bf8_t
>
(
0.0
f
)),
abs_tol
);
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
bf8_t
>
(
0.0
f
)),
abs_tol
);
// don't run the next test on gfx11 devices
#ifndef CK_SKIP_FLAKY_F8_TEST
// convert minimal float to bf8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
#endif
// convert maximal bf8_t to float and check if equal to 57344.0
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
57344.0
f
)),
abs_tol
);
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
57344.0
f
)),
abs_tol
);
// convert maximal float to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR
(
57344.0
f
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to bf8_t and check if it is qNan
ASSERT_NEAR
(
type_convert
<
bf8_t
>
(
0x80
),
type
_convert
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
f8
_convert
_rne
<
bf8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive norm float value to bf8 and back, check if holds
float
pos_float
=
0.0000762939
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
pos_float
)),
abs_tol
);
// negative norm float value to bf8 and back, check if holds
float
neg_float
=
-
0.0000610351
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
neg_float
)),
abs_tol
);
// positive subnorm float value to bf8 and back, check if holds
pos_float
=
0.0000305175
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
pos_float
)),
abs_tol
);
// negative subnorm float value to bf8 and back, check if holds
neg_float
=
-
0.0000152587
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type
_convert
<
bf8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
bf8_t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
BF8
,
ConvertFP32Stochastic
)
...
...
@@ -92,34 +96,34 @@ TEST(BF8, ConvertFP16Nearest)
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to bf8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
half_t
{
0.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to bf8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal bf8_t to fp16 and check if equal to 57344.0
ASSERT_NEAR
(
half_t
{
57344.0
},
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
half_t
{
57344.0
})),
abs_tol
);
half_t
{
57344.0
},
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
half_t
{
57344.0
})),
abs_tol
);
// convert maximal fp16 to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR
(
half_t
{
57344.0
},
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to bf8_t and check if it is QuietNaN
ASSERT_NEAR
(
type_convert
<
bf8_t
>
(
0x80
),
type
_convert
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
f8
_convert
_rne
<
bf8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive norm fp16 value to bf8 and back, check if holds
half_t
pos_half
=
half_t
{
0.0000762939
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
pos_half
)),
abs_tol
);
// negative norm fp16 value to bf8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.0000610351
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
neg_half
)),
abs_tol
);
// positive subnorm fp16 value to bf8 and back, check if holds
pos_half
=
half_t
{
0.0000305175
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
pos_half
)),
abs_tol
);
// negative subnorm fp16 value to bf8 and back, check if holds
neg_half
=
half_t
{
-
0.0000152587
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type
_convert
<
bf8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
bf8_t
>
(
neg_half
)),
abs_tol
);
}
TEST
(
BF8
,
ConvertFP16Stochastic
)
...
...
test/data_type/test_fp8.cpp
View file @
f6ceef78
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using
ck
::
f8_convert_rne
;
using
ck
::
f8_convert_sr
;
using
ck
::
f8_t
;
using
ck
::
half_t
;
...
...
@@ -24,33 +25,36 @@ TEST(FP8, ConvertFP32Nearest)
// fix the tolerance value
float
abs_tol
=
1e-6
;
// convert 0 float to fp8 and back, check if holds
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
type_convert
<
f8_t
>
(
0.0
f
)),
abs_tol
);
ASSERT_NEAR
(
0.0
f
,
type_convert
<
float
>
(
f8_convert_rne
<
f8_t
>
(
0.0
f
)),
abs_tol
);
// don't run the next test on gfx11 devices
#ifndef CK_SKIP_FLAKY_F8_TEST
// convert minimal float to fp8 and back, check if holds
ASSERT_NEAR
(
std
::
numeric_limits
<
float
>::
min
(),
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
min
())),
abs_tol
);
#endif
// convert maximal f8_t to float and check if equal to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
240.0
f
)),
abs_tol
);
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
240.0
f
)),
abs_tol
);
// convert maximal float to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
240.0
f
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
max
())),
abs_tol
);
// convert inf float to f8_t and check if it is qNan
ASSERT_NEAR
(
type_convert
<
f8_t
>
(
0x80
),
type
_convert
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
f8
_convert
_rne
<
f8_t
>
(
std
::
numeric_limits
<
float
>::
infinity
()),
abs_tol
);
// positive norm float value to fp8 and back, check if holds
float
pos_float
=
0.017578125
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
pos_float
)),
abs_tol
);
// negative norm float value to fp8 and back, check if holds
float
neg_float
=
-
0.015625
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
neg_float
)),
abs_tol
);
// positive subnorm float value to fp8 and back, check if holds
pos_float
=
0.00390625
f
;
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
pos_float
)),
abs_tol
);
ASSERT_NEAR
(
pos_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
pos_float
)),
abs_tol
);
// negative subnorm float value to fp8 and back, check if holds
neg_float
=
-
0.001953125
f
;
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
type
_convert
<
f8_t
>
(
neg_float
)),
abs_tol
);
ASSERT_NEAR
(
neg_float
,
type_convert
<
float
>
(
f8
_convert
_rne
<
f8_t
>
(
neg_float
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP32Stochastic
)
...
...
@@ -92,33 +96,33 @@ TEST(FP8, ConvertFP16Nearest)
// fix the tolerance value
float
abs_tol
=
1e-3
;
// convert 0 fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
half_t
{
0.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
0.0
},
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
half_t
{
0.0
})),
abs_tol
);
// convert minimal fp16 to fp8 and back, check if holds
ASSERT_NEAR
(
ck
::
NumericLimits
<
half_t
>::
Min
(),
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Min
())),
abs_tol
);
// convert maximal f8_t to fp16 and check if equal to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
half_t
{
240.0
})),
abs_tol
);
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
half_t
{
240.0
})),
abs_tol
);
// convert maximal fp16 to fp8 and back, check if clipped to 240.0
ASSERT_NEAR
(
half_t
{
240.0
},
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
Max
())),
abs_tol
);
// convert QuietNaN fp16 to f8_t and check if it is QuietNaN
ASSERT_NEAR
(
type_convert
<
f8_t
>
(
0x80
),
type
_convert
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
f8
_convert
_rne
<
f8_t
>
(
ck
::
NumericLimits
<
half_t
>::
QuietNaN
()),
abs_tol
);
// positive norm fp16 value to fp8 and back, check if holds
half_t
pos_half
=
half_t
{
0.017578125
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
pos_half
)),
abs_tol
);
// negative norm fp16 value to fp8 and back, check if holds
half_t
neg_half
=
half_t
{
-
0.015625
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
neg_half
)),
abs_tol
);
// positive subnorm fp16 value to fp8 and back, check if holds
pos_half
=
half_t
{
0.00390625
};
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
pos_half
)),
abs_tol
);
ASSERT_NEAR
(
pos_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
pos_half
)),
abs_tol
);
// negative subnorm fp16 value to fp8 and back, check if holds
neg_half
=
half_t
{
-
0.001953125
};
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
type
_convert
<
f8_t
>
(
neg_half
)),
abs_tol
);
ASSERT_NEAR
(
neg_half
,
type_convert
<
half_t
>
(
f8
_convert
_rne
<
f8_t
>
(
neg_half
)),
abs_tol
);
}
TEST
(
FP8
,
ConvertFP16Stochastic
)
...
...
test/gemm_universal/test_gemm_universal_xdl.cpp
View file @
f6ceef78
...
...
@@ -44,17 +44,22 @@ class TestGemmUniversal_MK_NK
using
KernelTypes_MK_KN
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
std
::
tuple
<
F16
,
F16
,
F16
,
F16
>
,
#if (defined CK_ENABLE_FP8)
std
::
tuple
<
F16
,
F8
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
,
#endif
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
>
;
using
KernelTypes_MK_NK
=
::
testing
::
Types
<
// ADataType, BDataType, ComputeDataType, CDataType
std
::
tuple
<
F16
,
F16
,
F16
,
F16
>
,
#if (defined CK_ENABLE_FP8)
std
::
tuple
<
F16
,
F8
,
F16
,
F16
>
,
std
::
tuple
<
F8
,
F16
,
F16
,
F16
>
,
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
,
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
std
::
tuple
<
F8
,
F8
,
F8
,
BF16
>
,
#endif
std
::
tuple
<
BF16
,
BF16
,
BF16
,
BF16
>
>
;
// clang-format on
...
...
test/grouped_convnd_fwd/CMakeLists.txt
View file @
f6ceef78
...
...
@@ -7,6 +7,12 @@ if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11")
endif
()
endif
()
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_executable
(
test_grouped_convnd_fwd_large_cases_xdl test_grouped_convnd_fwd_large_cases_xdl.cpp
)
target_compile_options
(
test_grouped_convnd_fwd_large_cases_xdl PRIVATE -Wno-global-constructors -Wno-undef
)
target_link_libraries
(
test_grouped_convnd_fwd_large_cases_xdl PRIVATE gtest_main getopt::getopt utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance
)
endif
()
add_gtest_executable
(
test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility
)
...
...
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
View file @
f6ceef78
...
...
@@ -17,6 +17,7 @@ class TestGroupedConvndFwd : public ::testing::Test
using
InLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
IndexType
=
ck
::
index_t
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
...
...
@@ -33,7 +34,10 @@ class TestGroupedConvndFwd : public ::testing::Test
OutLayout
,
DataType
,
DataType
,
DataType
>
(
DataType
,
DataType
,
DataType
,
IndexType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
...
...
@@ -69,8 +73,6 @@ using KernelTypes3d = ::testing::Types<std::tuple<float, GNDHWC, GKZYXC, GNDHWK>
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>
,
std
::
tuple
<
int8_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>>
;
using
KernelTypes2dLargeCases
=
::
testing
::
Types
<
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndFwd1d
:
public
TestGroupedConvndFwd
<
Tuple
>
{
...
...
@@ -86,15 +88,9 @@ class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndFwd2dLargeCases
:
public
TestGroupedConvndFwd
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndFwd1d
,
KernelTypes1d
);
TYPED_TEST_SUITE
(
TestGroupedConvndFwd2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndFwd3d
,
KernelTypes3d
);
TYPED_TEST_SUITE
(
TestGroupedConvndFwd2dLargeCases
,
KernelTypes2dLargeCases
);
TYPED_TEST
(
TestGroupedConvndFwd1d
,
Test1D
)
{
...
...
@@ -144,14 +140,3 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{
3
,
96
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
4
,
30
,
160
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
template
Run
<
3
>();
}
TYPED_TEST
(
TestGroupedConvndFwd2dLargeCases
,
Test2DLargeCases
)
{
// Case larger than 2GB
this
->
conv_params
.
push_back
(
{
2
,
1
,
64
,
4
,
192
,
{
2
,
2
},
{
224
,
224
},
{
224
,
224
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
// With supported NumGroupsToMerge > 1
this
->
conv_params
.
push_back
(
{
2
,
32
,
64
,
1
,
1
,
{
2
,
2
},
{
672
,
672
},
{
672
,
672
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
template
Run
<
2
>();
}
test/grouped_convnd_fwd/test_grouped_convnd_fwd_large_cases_xdl.cpp
0 → 100644
View file @
f6ceef78
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndFwd
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
IndexType
=
ck
::
long_index_t
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
EXPECT_FALSE
(
conv_params
.
empty
());
bool
pass
=
true
;
for
(
auto
&
param
:
conv_params
)
{
pass
=
pass
&&
ck
::
profiler
::
profile_grouped_conv_fwd_impl
<
NDimSpatial
,
InLayout
,
WeiLayout
,
OutLayout
,
DataType
,
DataType
,
DataType
,
DataType
,
DataType
,
IndexType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
);
}
EXPECT_TRUE
(
pass
);
}
};
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGC
,
GKYXC
,
NHWGK
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
NDHWGC
,
GKZYXC
,
NDHWGK
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGC
,
GKZYXC
,
NDHWGK
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndFwd2d
:
public
TestGroupedConvndFwd
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndFwd3d
:
public
TestGroupedConvndFwd
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndFwd2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndFwd3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndFwd2d
,
Test2D
)
{
// Case larger than 2GB
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
4
,
192
,
{
2
,
2
},
{
224
,
224
},
{
224
,
224
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
// With supported NumGroupsToMerge > 1
this
->
conv_params
.
push_back
(
{
2
,
32
,
64
,
1
,
1
,
{
2
,
2
},
{
672
,
672
},
{
672
,
672
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
// When image is larger than 2GB
this
->
conv_params
.
push_back
(
{
2
,
2
,
2
,
128
,
128
,
{
3
,
3
},
{
4096
,
2048
},
{
300
,
300
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndFwd3d
,
Test3D
)
{
// Case larger than 2GB
this
->
conv_params
.
push_back
({
3
,
1
,
128
,
4
,
192
,
{
2
,
2
,
2
},
{
2
,
224
,
224
},
{
1
,
224
,
224
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
// With supported NumGroupsToMerge > 1
this
->
conv_params
.
push_back
({
3
,
32
,
64
,
1
,
1
,
{
2
,
2
,
2
},
{
360
,
2
,
672
},
{
360
,
2
,
672
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
// When image is larger than 2GB
this
->
conv_params
.
push_back
({
3
,
1
,
2
,
128
,
128
,
{
3
,
1
,
3
},
{
900
,
2
,
2048
},
{
300
,
1
,
300
},
{
3
,
2
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
template
Run
<
3
>();
}
test/reduce/CMakeLists.txt
View file @
f6ceef78
add_test_executable
(
test_reduce_no_index reduce_no_index.cpp
)
add_test_executable
(
test_reduce_with_index reduce_with_index.cpp
)
add_
g
test_executable
(
test_reduce_no_index reduce_no_index.cpp
)
add_
g
test_executable
(
test_reduce_with_index reduce_with_index.cpp
)
target_link_libraries
(
test_reduce_no_index PRIVATE utility device_reduce_instance
)
target_link_libraries
(
test_reduce_with_index PRIVATE utility device_reduce_instance
)
test/reduce/reduce_no_index.cpp
View file @
f6ceef78
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <getopt.h>
#include "ck/library/utility/host_common_util.hpp"
#include "profiler/profile_reduce_impl.hpp"
#include <gtest/gtest.h>
using
namespace
ck
;
static
struct
option
long_options
[]
=
{{
"inLengths"
,
required_argument
,
nullptr
,
'D'
},
{
"reduceDimensions"
,
required_argument
,
nullptr
,
'R'
},
{
"scales"
,
required_argument
,
nullptr
,
'S'
},
{
"help"
,
no_argument
,
nullptr
,
'?'
},
{
nullptr
,
0
,
nullptr
,
0
}};
struct
ReduceParam
{
bool
do_verification
{
true
};
bool
propagateNan
{
false
};
bool
useIndex
{
false
};
bool
time_kernel
{
false
};
bool
do_dumpout
{
false
};
int
init_method
{
2
};
float
alpha
{
1.0
f
};
float
beta
{
0.0
f
};
std
::
vector
<
size_t
>
inLengths
{
64
,
4
,
280
,
82
};
std
::
vector
<
int
>
reduceDims
{
0
,
1
,
2
,
3
};
};
class
SimpleAppArgs
std
::
vector
<
std
::
vector
<
int
>>
SetGenericReduceDim
()
{
private:
int
option_index
=
0
;
return
{{
0
,
1
,
2
,
3
},
{
0
,
1
,
2
},
{
0
,
1
,
3
},
{
0
,
2
,
3
},
{
1
,
2
,
3
},
{
0
},
{
1
},
{
2
},
{
3
}};
}
public:
std
::
vector
<
size_t
>
inLengths
;
std
::
vector
<
int
>
reduceDims
;
std
::
vector
<
float
>
scales
;
template
<
typename
T
>
class
ReduceWithIndexTest
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
T
>
;
using
AccDataType
=
std
::
tuple_element_t
<
1
,
T
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
T
>
;
int
data_type
;
int
init_method
=
1
;
static
std
::
vector
<
ReduceParam
>
params
;
public:
void
show_usage
(
const
char
*
cmd
)
static
void
SetUpTestSuite
()
{
std
::
cout
<<
"Usage of "
<<
cmd
<<
std
::
endl
;
std
::
cout
<<
"--inLengths or -D, comma separated list of input tensor dimension lengths "
"(only 4-d tensor supported)"
<<
std
::
endl
;
std
::
cout
<<
"--reduceDimensions or -R comma seperated list of dimension indexes to reduce "
"(only 1 or 3 or 4 dimensions supported)"
<<
std
::
endl
;
std
::
cout
<<
"--scales or -S, comma separated two float values for alpha and beta"
<<
std
::
endl
;
std
::
cout
<<
"Arg1 -- data type (0: fp16, 1: fp32, 3: int8, 5: bp16, 6: fp64)"
<<
std
::
endl
;
std
::
cout
<<
"Arg2 -- init method(0=no init, 1=single integer value, 2=scope integer "
"value, 3=decimal value)"
<<
std
::
endl
;
};
int
processArgs
(
int
argc
,
char
*
argv
[])
{
using
ck
::
host_common
::
getTypeValuesFromString
;
int
ch
;
// set testcase variables
ReduceParam
set
;
const
auto
setReduceDim
=
SetGenericReduceDim
();
while
(
1
)
for
(
std
::
size_t
i
(
0
);
i
<
setReduceDim
.
size
();
++
i
)
{
ch
=
getopt_long
(
argc
,
argv
,
"D:R:S:"
,
long_options
,
&
option_index
);
if
(
ch
==
-
1
)
break
;
switch
(
ch
)
{
case
'D'
:
if
(
!
optarg
)
throw
std
::
runtime_error
(
"Invalid option format!"
);
inLengths
=
getTypeValuesFromString
<
size_t
>
(
optarg
);
break
;
case
'R'
:
if
(
!
optarg
)
throw
std
::
runtime_error
(
"Invalid option format!"
);
reduceDims
=
getTypeValuesFromString
<
int
>
(
optarg
);
break
;
case
'S'
:
if
(
!
optarg
)
throw
std
::
runtime_error
(
"Invalid option format!"
);
scales
=
getTypeValuesFromString
<
float
>
(
optarg
);
break
;
case
'?'
:
if
(
std
::
string
(
long_options
[
option_index
].
name
)
==
"help"
)
{
show_usage
(
argv
[
0
]);
return
(
-
1
);
};
break
;
default:
show_usage
(
argv
[
0
]);
return
(
-
1
);
};
};
if
(
optind
+
2
>
argc
)
throw
std
::
runtime_error
(
"Invalid cmd-line arguments, more argumetns are needed!"
);
data_type
=
std
::
atoi
(
argv
[
optind
++
]);
init_method
=
std
::
atoi
(
argv
[
optind
]);
if
(
scales
.
empty
())
set
.
reduceDims
=
setReduceDim
[
i
];
params
.
emplace_back
(
set
);
}
}
template
<
ReduceTensorOp
ReduceOpIdType
>
void
Run
()
{
for
(
auto
param
:
this
->
params
)
{
scales
.
push_back
(
1.0
f
);
scales
.
push_back
(
0.0
f
);
};
bool
success
=
ck
::
profiler
::
profile_reduce_impl
<
InDataType
,
AccDataType
,
OutDataType
>
(
param
.
do_verification
,
param
.
init_method
,
param
.
do_dumpout
,
param
.
time_kernel
,
param
.
inLengths
,
param
.
reduceDims
,
ReduceOpIdType
,
param
.
propagateNan
,
param
.
useIndex
,
param
.
alpha
,
param
.
beta
);
EXPECT_TRUE
(
success
);
}
}
};
if
(
inLengths
.
size
()
!=
4
||
(
reduceDims
.
size
()
!=
1
&&
reduceDims
.
size
()
!=
3
&&
reduceDims
.
size
()
!=
4
))
return
(
-
1
);
template
<
typename
T
>
std
::
vector
<
ReduceParam
>
ReduceWithIndexTest
<
T
>::
params
=
{};
if
(
data_type
!=
0
&&
data_type
!=
1
&&
data_type
!=
3
&&
data_type
!=
5
&&
data_type
!=
6
)
return
(
-
1
);
using
Reduce_float_types
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
>>
;
using
Reduce_double_types
=
::
testing
::
Types
<
std
::
tuple
<
double
,
double
,
double
>>
;
using
Reduce_int8t_types
=
::
testing
::
Types
<
std
::
tuple
<
int8_t
,
int8_t
,
int8_t
>>
;
using
Reduce_half_types
=
::
testing
::
Types
<
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>>
;
using
Reduce_bhalf_float_Types
=
::
testing
::
Types
<
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
>>
;
return
(
0
);
};
template
<
typename
TType
>
class
ReduceWithNoIndexFloat
:
public
ReduceWithIndexTest
<
TType
>
{
};
bool
test_reduce_no_index
(
int
data_type
,
int
init_method
,
std
::
vector
<
int
>
reduceDims
,
std
::
vector
<
size_t
>
inLengths
,
ReduceTensorOp
reduceOpId
,
bool
propagateNan
,
float
alpha
,
float
beta
)
template
<
typename
TType
>
class
ReduceWithNoIndexDouble
:
public
ReduceWithIndexTest
<
TType
>
{
using
ck
::
profiler
::
profile_reduce_impl
;
}
;
bool
result
=
true
;
template
<
typename
TType
>
class
ReduceWithNoIndexInt8
:
public
ReduceWithIndexTest
<
TType
>
{
};
if
(
data_type
==
0
)
{
result
=
profile_reduce_impl
<
float
,
float
,
float
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
false
,
alpha
,
beta
);
}
else
if
(
data_type
==
1
)
{
result
=
profile_reduce_impl
<
ck
::
half_t
,
float
,
ck
::
half_t
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
false
,
alpha
,
beta
);
}
else
if
(
data_type
==
3
)
{
result
=
profile_reduce_impl
<
int8_t
,
int32_t
,
int8_t
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
false
,
alpha
,
beta
);
}
else
if
(
data_type
==
5
)
{
result
=
profile_reduce_impl
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
false
,
alpha
,
beta
);
}
else
if
(
data_type
==
6
)
{
result
=
profile_reduce_impl
<
double
,
double
,
double
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
false
,
alpha
,
beta
);
}
template
<
typename
TType
>
class
ReduceWithNoIndexHalf
:
public
ReduceWithIndexTest
<
TType
>
{
};
return
(
result
);
template
<
typename
TType
>
class
ReduceWithNoIndexBHalfFloat
:
public
ReduceWithIndexTest
<
TType
>
{
};
constexpr
ReduceTensorOp
reduceOpId
=
ReduceTensorOp
::
AVG
;
constexpr
bool
propagateNan
=
false
;
TYPED_TEST_SUITE
(
ReduceWithNoIndexFloat
,
Reduce_float_types
);
TYPED_TEST_SUITE
(
ReduceWithNoIndexDouble
,
Reduce_double_types
);
TYPED_TEST_SUITE
(
ReduceWithNoIndexInt8
,
Reduce_int8t_types
);
TYPED_TEST_SUITE
(
ReduceWithNoIndexHalf
,
Reduce_half_types
);
TYPED_TEST_SUITE
(
ReduceWithNoIndexBHalfFloat
,
Reduce_bhalf_float_Types
);
int
main
(
int
argc
,
char
*
argv
[]
)
TYPED_TEST
(
ReduceWithNoIndexFloat
,
ReduceWithNoIndexTestFloat_AMAX
)
{
SimpleAppArgs
args
;
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
bool
result
=
true
;
TYPED_TEST
(
ReduceWithNoIndexFloat
,
ReduceWithNoIndexTestFloat_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>();
}
if
(
argc
==
1
)
{
int
data_type
=
1
;
int
init_method
=
2
;
std
::
vector
<
size_t
>
inLengths
{
64
,
4
,
280
,
80
};
std
::
vector
<
std
::
vector
<
int
>>
v_reduceDims
{
{
0
,
1
,
2
,
3
},
{
0
,
1
,
2
},
{
1
,
2
,
3
},
{
0
,
1
,
3
},
{
0
,
2
,
3
},
{
0
},
{
1
},
{
2
},
{
3
}};
for
(
auto
&
reduceDims
:
v_reduceDims
)
result
=
result
&&
test_reduce_no_index
(
data_type
,
init_method
,
reduceDims
,
inLengths
,
reduceOpId
,
propagateNan
,
1.0
f
,
0.0
f
);
}
else
{
if
(
args
.
processArgs
(
argc
,
argv
)
<
0
)
{
throw
std
::
runtime_error
(
"Invalid input arguments, test_reduce_no_index could not be executed!"
);
};
result
=
test_reduce_no_index
(
args
.
data_type
,
args
.
init_method
,
args
.
reduceDims
,
args
.
inLengths
,
reduceOpId
,
propagateNan
,
args
.
scales
[
0
],
args
.
scales
[
1
]
);
}
TYPED_TEST
(
ReduceWithNoIndexFloat
,
ReduceWithNoIndexTestFloat_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>()
;
}
TYPED_TEST
(
ReduceWithNoIndexDouble
,
ReduceWithNoIndexTestDouble_AMAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
TYPED_TEST
(
ReduceWithNoIndexDouble
,
ReduceWithNoIndexTestDouble_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>();
}
TYPED_TEST
(
ReduceWithNoIndexDouble
,
ReduceWithNoIndexTestDouble_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>();
}
TYPED_TEST
(
ReduceWithNoIndexInt8
,
ReduceWithNoIndexTestInt8_AMAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
TYPED_TEST
(
ReduceWithNoIndexInt8
,
ReduceWithNoIndexTestInt8_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>(
);
}
std
::
cout
<<
"test_reduce_no_index ..... "
<<
(
result
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
TYPED_TEST
(
ReduceWithNoIndexInt8
,
ReduceWithNoIndexTestInt8_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>();
}
TYPED_TEST
(
ReduceWithNoIndexHalf
,
ReduceWithNoIndexTestHalf_AMAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
TYPED_TEST
(
ReduceWithNoIndexHalf
,
ReduceWithNoIndexTestHalf_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>();
}
TYPED_TEST
(
ReduceWithNoIndexHalf
,
ReduceWithNoIndexTestHalf_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>();
}
TYPED_TEST
(
ReduceWithNoIndexBHalfFloat
,
ReduceWithNoIndexTesBtHalfFloat_AMAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
return
(
result
?
0
:
-
1
);
TYPED_TEST
(
ReduceWithNoIndexBHalfFloat
,
ReduceWithNoIndexTestBHalfFloat_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>();
}
TYPED_TEST
(
ReduceWithNoIndexBHalfFloat
,
ReduceWithNoIndexTestBHalfFloat_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>();
}
test/reduce/reduce_with_index.cpp
View file @
f6ceef78
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <getopt.h>
#include "ck/library/utility/host_common_util.hpp"
#include "profiler/profile_reduce_impl.hpp"
#include <gtest/gtest.h>
using
namespace
ck
;
static
struct
option
long_options
[]
=
{{
"inLengths"
,
required_argument
,
nullptr
,
'D'
},
{
"reduceDimensions"
,
required_argument
,
nullptr
,
'R'
},
{
"scales"
,
required_argument
,
nullptr
,
'S'
},
{
"help"
,
no_argument
,
nullptr
,
'?'
},
{
nullptr
,
0
,
nullptr
,
0
}};
struct
ReduceParam
{
bool
do_verification
{
true
};
bool
propagateNan
{
false
};
bool
useIndex
{
false
};
bool
time_kernel
{
false
};
bool
do_dumpout
{
false
};
int
init_method
{
2
};
float
alpha
{
1.0
f
};
float
beta
{
0.0
f
};
std
::
vector
<
size_t
>
inLengths
{
64
,
4
,
280
,
82
};
std
::
vector
<
int
>
reduceDims
{
0
,
1
,
2
,
3
};
};
class
SimpleAppArgs
std
::
vector
<
std
::
vector
<
int
>>
SetGenericReduceDim
()
{
private:
int
option_index
=
0
;
return
{{
0
,
1
,
2
,
3
},
{
0
,
1
,
2
},
{
0
,
1
,
3
},
{
0
,
2
,
3
},
{
1
,
2
,
3
},
{
0
},
{
1
},
{
2
},
{
3
}};
}
public:
std
::
vector
<
size_t
>
inLengths
;
std
::
vector
<
int
>
reduceDims
;
std
::
vector
<
float
>
scales
;
template
<
typename
T
>
class
ReduceWithIndexTest
:
public
::
testing
::
Test
{
protected:
using
InDataType
=
std
::
tuple_element_t
<
0
,
T
>
;
using
AccDataType
=
std
::
tuple_element_t
<
1
,
T
>
;
using
OutDataType
=
std
::
tuple_element_t
<
2
,
T
>
;
int
data_type
;
int
init_method
=
1
;
static
std
::
vector
<
ReduceParam
>
params
;
public:
void
show_usage
(
const
char
*
cmd
)
static
void
SetUpTestSuite
()
{
std
::
cout
<<
"Usage of "
<<
cmd
<<
std
::
endl
;
std
::
cout
<<
"--inLengths or -D, comma separated list of input tensor dimension lengths "
"(only 4-d tensor supported)"
<<
std
::
endl
;
std
::
cout
<<
"--reduceDimensions or -R comma seperated list of dimension indexes to reduce "
"(only 1 or 3 or 4 dimensions supported)"
<<
std
::
endl
;
std
::
cout
<<
"--scales or -S, comma separated two float values for alpha and beta"
<<
std
::
endl
;
std
::
cout
<<
"Arg1 -- data type (1: fp32, 3: int8, 5: bp16, 6: fp64)"
<<
std
::
endl
;
std
::
cout
<<
"Arg2 -- init method(0=no init, 1=single integer value, 2=scope integer "
"value, 3=decimal value)"
<<
std
::
endl
;
};
int
processArgs
(
int
argc
,
char
*
argv
[])
{
using
ck
::
host_common
::
getTypeValuesFromString
;
int
ch
;
// set testcase variables
ReduceParam
set
;
const
auto
setReduceDim
=
SetGenericReduceDim
();
while
(
1
)
for
(
std
::
size_t
i
(
0
);
i
<
setReduceDim
.
size
();
++
i
)
{
ch
=
getopt_long
(
argc
,
argv
,
"D:R:S:"
,
long_options
,
&
option_index
);
if
(
ch
==
-
1
)
break
;
switch
(
ch
)
{
case
'D'
:
if
(
!
optarg
)
throw
std
::
runtime_error
(
"Invalid option format!"
);
inLengths
=
getTypeValuesFromString
<
size_t
>
(
optarg
);
break
;
case
'R'
:
if
(
!
optarg
)
throw
std
::
runtime_error
(
"Invalid option format!"
);
reduceDims
=
getTypeValuesFromString
<
int
>
(
optarg
);
break
;
case
'S'
:
if
(
!
optarg
)
throw
std
::
runtime_error
(
"Invalid option format!"
);
scales
=
getTypeValuesFromString
<
float
>
(
optarg
);
break
;
case
'?'
:
if
(
std
::
string
(
long_options
[
option_index
].
name
)
==
"help"
)
{
show_usage
(
argv
[
0
]);
return
(
-
1
);
};
break
;
default:
show_usage
(
argv
[
0
]);
return
(
-
1
);
};
};
if
(
optind
+
2
>
argc
)
throw
std
::
runtime_error
(
"Invalid cmd-line arguments, more argumetns are needed!"
);
data_type
=
std
::
atoi
(
argv
[
optind
++
]);
init_method
=
std
::
atoi
(
argv
[
optind
]);
if
(
scales
.
empty
())
set
.
reduceDims
=
setReduceDim
[
i
];
params
.
emplace_back
(
set
);
}
}
template
<
ReduceTensorOp
ReduceOpIdType
>
void
Run
()
{
for
(
auto
param
:
this
->
params
)
{
scales
.
push_back
(
1.0
f
);
scales
.
push_back
(
0.0
f
);
};
bool
success
=
ck
::
profiler
::
profile_reduce_impl
<
InDataType
,
AccDataType
,
OutDataType
>
(
param
.
do_verification
,
param
.
init_method
,
param
.
do_dumpout
,
param
.
time_kernel
,
param
.
inLengths
,
param
.
reduceDims
,
ReduceOpIdType
,
param
.
propagateNan
,
param
.
useIndex
,
param
.
alpha
,
param
.
beta
);
EXPECT_TRUE
(
success
);
}
}
};
if
(
inLengths
.
size
()
!=
4
||
(
reduceDims
.
size
()
!=
1
&&
reduceDims
.
size
()
!=
3
&&
reduceDims
.
size
()
!=
4
))
return
(
-
1
);
template
<
typename
T
>
std
::
vector
<
ReduceParam
>
ReduceWithIndexTest
<
T
>::
params
=
{};
if
(
data_type
!=
0
&&
data_type
!=
1
&&
data_type
!=
3
&&
data_type
!=
5
&&
data_type
!=
6
)
return
(
-
1
);
using
Reduce_float_types
=
::
testing
::
Types
<
std
::
tuple
<
float
,
float
,
float
>>
;
using
Reduce_double_types
=
::
testing
::
Types
<
std
::
tuple
<
double
,
double
,
double
>>
;
using
Reduce_int8t_types
=
::
testing
::
Types
<
std
::
tuple
<
int8_t
,
int8_t
,
int8_t
>>
;
using
Reduce_half_types
=
::
testing
::
Types
<
std
::
tuple
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>>
;
using
Reduce_bhalf_float_Types
=
::
testing
::
Types
<
std
::
tuple
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
>>
;
return
(
0
);
};
template
<
typename
TType
>
class
ReduceWithIndexFloat
:
public
ReduceWithIndexTest
<
TType
>
{
};
bool
test_reduce_with_index
(
int
data_type
,
int
init_method
,
std
::
vector
<
int
>
reduceDims
,
std
::
vector
<
size_t
>
inLengths
,
ReduceTensorOp
reduceOpId
,
bool
propagateNan
,
float
alpha
,
float
beta
)
template
<
typename
TType
>
class
ReduceWithIndexDouble
:
public
ReduceWithIndexTest
<
TType
>
{
using
ck
::
profiler
::
profile_reduce_impl
;
}
;
bool
result
=
true
;
template
<
typename
TType
>
class
ReduceWithIndexInt8
:
public
ReduceWithIndexTest
<
TType
>
{
};
if
(
data_type
==
0
)
{
result
=
profile_reduce_impl
<
float
,
float
,
float
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
true
,
alpha
,
beta
);
}
else
if
(
data_type
==
1
)
{
result
=
profile_reduce_impl
<
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
true
,
alpha
,
beta
);
}
else
if
(
data_type
==
3
)
{
result
=
profile_reduce_impl
<
int8_t
,
int8_t
,
int8_t
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
true
,
alpha
,
beta
);
}
else
if
(
data_type
==
5
)
{
result
=
profile_reduce_impl
<
ck
::
bhalf_t
,
float
,
ck
::
bhalf_t
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
true
,
alpha
,
beta
);
}
else
if
(
data_type
==
6
)
{
result
=
profile_reduce_impl
<
double
,
double
,
double
>
(
true
,
init_method
,
false
,
false
,
inLengths
,
reduceDims
,
reduceOpId
,
propagateNan
,
true
,
alpha
,
beta
);
}
template
<
typename
TType
>
class
ReduceWithIndexHalf
:
public
ReduceWithIndexTest
<
TType
>
{
};
return
(
result
);
template
<
typename
TType
>
class
ReduceWithIndexBHalfFloat
:
public
ReduceWithIndexTest
<
TType
>
{
};
constexpr
ReduceTensorOp
reduceOpId
=
ReduceTensorOp
::
AMAX
;
constexpr
bool
propagateNan
=
false
;
TYPED_TEST_SUITE
(
ReduceWithIndexFloat
,
Reduce_float_types
);
TYPED_TEST_SUITE
(
ReduceWithIndexDouble
,
Reduce_double_types
);
TYPED_TEST_SUITE
(
ReduceWithIndexInt8
,
Reduce_int8t_types
);
TYPED_TEST_SUITE
(
ReduceWithIndexHalf
,
Reduce_half_types
);
TYPED_TEST_SUITE
(
ReduceWithIndexBHalfFloat
,
Reduce_bhalf_float_Types
);
int
main
(
int
argc
,
char
*
argv
[]
)
TYPED_TEST
(
ReduceWithIndexFloat
,
ReduceWithIndexTestFloat_AMAX
)
{
SimpleAppArgs
args
;
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
bool
result
=
true
;
TYPED_TEST
(
ReduceWithIndexFloat
,
ReduceWithIndexTestFloat_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>();
}
if
(
argc
==
1
)
{
int
data_type
=
1
;
int
init_method
=
2
;
std
::
vector
<
size_t
>
inLengths
{
64
,
4
,
280
,
80
};
std
::
vector
<
std
::
vector
<
int
>>
v_reduceDims
{
{
0
,
1
,
2
,
3
},
{
0
,
1
,
2
},
{
1
,
2
,
3
},
{
0
,
1
,
3
},
{
0
,
2
,
3
},
{
0
},
{
1
},
{
2
},
{
3
}};
for
(
auto
&
reduceDims
:
v_reduceDims
)
result
=
result
&&
test_reduce_with_index
(
data_type
,
init_method
,
reduceDims
,
inLengths
,
reduceOpId
,
propagateNan
,
1.0
f
,
0.0
f
);
}
else
{
if
(
args
.
processArgs
(
argc
,
argv
)
<
0
)
{
throw
std
::
runtime_error
(
"Invalid input arguments, test_reduce_with_index could not be executed!"
);
};
result
=
test_reduce_with_index
(
args
.
data_type
,
args
.
init_method
,
args
.
reduceDims
,
args
.
inLengths
,
reduceOpId
,
propagateNan
,
args
.
scales
[
0
],
args
.
scales
[
1
]
);
}
TYPED_TEST
(
ReduceWithIndexFloat
,
ReduceWithIndexTestFloat_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>()
;
}
TYPED_TEST
(
ReduceWithIndexDouble
,
ReduceWithIndexTestDouble_AMAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
TYPED_TEST
(
ReduceWithIndexDouble
,
ReduceWithIndexTestDouble_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>();
}
TYPED_TEST
(
ReduceWithIndexDouble
,
ReduceWithIndexTestDouble_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>();
}
TYPED_TEST
(
ReduceWithIndexInt8
,
ReduceWithIndexTestInt8_AMAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
TYPED_TEST
(
ReduceWithIndexInt8
,
ReduceWithIndexTestInt8_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>(
);
}
std
::
cout
<<
"test_reduce_with_index ..... "
<<
(
result
?
"SUCCESS"
:
"FAILURE"
)
<<
std
::
endl
;
TYPED_TEST
(
ReduceWithIndexInt8
,
ReduceWithIndexTestInt8_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>();
}
TYPED_TEST
(
ReduceWithIndexHalf
,
ReduceWithIndexTestHalf_AMAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
TYPED_TEST
(
ReduceWithIndexHalf
,
ReduceWithIndexTestHalf_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>();
}
TYPED_TEST
(
ReduceWithIndexHalf
,
ReduceWithIndexTestHalf_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>();
}
TYPED_TEST
(
ReduceWithIndexBHalfFloat
,
ReduceWithIndexTesBtHalfFloat_AMAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
AMAX
>();
}
return
(
result
?
0
:
-
1
);
TYPED_TEST
(
ReduceWithIndexBHalfFloat
,
ReduceWithIndexTestBHalfFloat_MIN
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MIN
>();
}
TYPED_TEST
(
ReduceWithIndexBHalfFloat
,
ReduceWithIndexTestBHalfFloat_MAX
)
{
// trigger Run() -> Generic
this
->
template
Run
<
ReduceTensorOp
::
MAX
>();
}
test/smfmac_op/smfmac_op_xdl.cpp
View file @
f6ceef78
...
...
@@ -13,6 +13,7 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "test/smfmac_op/smfmac_op_util.hpp"
#include "ck/host_utility/device_prop.hpp"
using
BF16
=
ck
::
bhalf_t
;
using
F16
=
ck
::
half_t
;
...
...
@@ -38,40 +39,43 @@ class TestSmfmac : public ::testing::Test
void
Run
()
{
bool
pass
=
true
;
constexpr
auto
matmul_default
=
ck
::
smfmac_op_util
::
matmul
<
Src1Type
,
Src1VecSize
,
Src2Type
,
Src2VecSize
,
GPUAccType
,
AccVecSize
,
DstType
,
M
,
N
,
K
>
;
bool
pass
=
true
;
if
(
ck
::
get_device_name
()
==
"gfx942"
)
{
constexpr
auto
matmul_default
=
ck
::
smfmac_op_util
::
matmul
<
Src1Type
,
Src1VecSize
,
Src2Type
,
Src2VecSize
,
GPUAccType
,
AccVecSize
,
DstType
,
M
,
N
,
K
>
;
constexpr
auto
smfmac_kernel_container
=
std
::
make_tuple
(
matmul_default
);
ck
::
static_for
<
0
,
std
::
tuple_size_v
<
decltype
(
smfmac_kernel_container
)
>
,
1
>
{}([
&
](
auto
i
)
{
pass
&=
ck
::
smfmac_op_util
::
TestSmfmac
<
std
::
tuple_element_t
<
i
.
value
,
decltype
(
smfmac_kernel_container
)
>
,
Src1Type
,
Src2Type
,
DstType
,
GPUAccType
,
CPUAccType
,
decltype
(
Row
{}),
decltype
(
Row
{}),
decltype
(
Row
{}),
PassThrough
,
PassThrough
,
PassThrough
,
AccVecSize
,
M
,
N
,
K
>
{}(
std
::
get
<
ck
::
Number
<
i
>
{}
>
(
smfmac_kernel_container
));
});
constexpr
auto
smfmac_kernel_container
=
std
::
make_tuple
(
matmul_default
);
ck
::
static_for
<
0
,
std
::
tuple_size_v
<
decltype
(
smfmac_kernel_container
)
>
,
1
>
{}(
[
&
](
auto
i
)
{
pass
&=
ck
::
smfmac_op_util
::
TestSmfmac
<
std
::
tuple_element_t
<
i
.
value
,
decltype
(
smfmac_kernel_container
)
>
,
Src1Type
,
Src2Type
,
DstType
,
GPUAccType
,
CPUAccType
,
decltype
(
Row
{}),
decltype
(
Row
{}),
decltype
(
Row
{}),
PassThrough
,
PassThrough
,
PassThrough
,
AccVecSize
,
M
,
N
,
K
>
{}(
std
::
get
<
ck
::
Number
<
i
>
{}
>
(
smfmac_kernel_container
));
});
}
EXPECT_TRUE
(
pass
);
}
};
...
...
Prev
1
…
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment