"vscode:/vscode.git/clone" did not exist on "3126507ab99138d7ea8568381169f6244e64fedf"
Commit f6ceef78 authored by ThomasNing's avatar ThomasNing
Browse files

merge with the develop branch

parents 536c5458 25935b57
...@@ -15,9 +15,27 @@ python3 process_perf_data.py perf_resnet50_N256.log ...@@ -15,9 +15,27 @@ python3 process_perf_data.py perf_resnet50_N256.log
python3 process_perf_data.py perf_resnet50_N4.log python3 process_perf_data.py perf_resnet50_N4.log
python3 process_perf_data.py perf_batched_gemm.log python3 process_perf_data.py perf_batched_gemm.log
python3 process_perf_data.py perf_grouped_gemm.log python3 process_perf_data.py perf_grouped_gemm.log
python3 process_perf_data.py perf_conv_fwd.log python3 process_perf_data.py perf_grouped_conv_fwd.log
python3 process_perf_data.py perf_conv_bwd_data.log python3 process_perf_data.py perf_grouped_conv_bwd_data.log
python3 process_perf_data.py perf_grouped_conv_bwd_weight.log
python3 process_perf_data.py perf_gemm_bilinear.log python3 process_perf_data.py perf_gemm_bilinear.log
python3 process_perf_data.py perf_reduction.log python3 process_perf_data.py perf_reduction.log
python3 process_perf_data.py perf_splitK_gemm.log python3 process_perf_data.py perf_splitK_gemm.log
python3 process_perf_data.py perf_onnx_gemm.log python3 process_perf_data.py perf_onnx_gemm.log
file=./perf_fmha_fwd_gfx942.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
fi
file=./perf_fmha_bwd_gfx942.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_fmha_bwd_gfx942.log
fi
file=./perf_fmha_fwd_gfx90a.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_fmha_fwd_gfx90a.log
fi
file=./perf_fmha_bwd_gfx90a.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_fmha_bwd_gfx90a.log
fi
...@@ -12,27 +12,28 @@ INIT=$5 ...@@ -12,27 +12,28 @@ INIT=$5
LOG=$6 LOG=$6
TIME=$7 TIME=$7
N=$8 N=$8
SplitK=$9
# Resnet50 # Resnet50
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads ######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $SplitK
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 $SplitK
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
DRIVER="../build/bin/ckProfiler"
OP=$1
DATATYPE=$2
LAYOUT=$3
INDEXTYPE=$4
VERIFY=$5
INIT=$6
LOG=$7
TIME=$8
N=$9
# Resnet50
######## op datatype indextype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $INDEXTYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
...@@ -90,21 +90,27 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name ...@@ -90,21 +90,27 @@ print_log_header $gemm_bilinear_log $env_type $branch $host_name
./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log ./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 2>&1 | tee -a $gemm_bilinear_log
#run conv_fwd tests #run grouped_fwd tests
export conv_fwd_log="perf_conv_fwd.log" export grouped_conv_fwd_log="perf_grouped_conv_fwd.log"
print_log_header $conv_fwd_log $env_type $branch $host_name print_log_header $grouped_conv_fwd_log $env_type $branch $host_name
./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log ./profile_grouped_conv_fwd.sh grouped_conv_fwd 0 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log ./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
./profile_conv_fwd.sh conv_fwd 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log ./profile_grouped_conv_fwd.sh grouped_conv_fwd 2 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
#run conv_bwd_data tests #run grouped_bwd_data tests
export conv_bwd_data_log="perf_conv_bwd_data.log" export grouped_conv_bwd_data_log="perf_grouped_conv_bwd_data.log"
print_log_header $conv_bwd_data_log $env_type $branch $host_name print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name
./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log ./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log ./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log ./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
#run grouped_bwd_weight tests
export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight.log"
print_log_header $grouped_conv_bwd_weight_log $env_type $branch $host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 0 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 2 2 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 2 $verify 1 0 1 256 4 2>&1 | tee -a $grouped_conv_bwd_weight_log
#run resnet50 tests #run resnet50 tests
export resnet256_log="perf_resnet50_N256.log" export resnet256_log="perf_resnet50_N256.log"
......
...@@ -51,6 +51,21 @@ print_log_header $gemm_log $env_type $branch $host_name ...@@ -51,6 +51,21 @@ print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
#run grouped_fwd fp16 tests
export grouped_conv_fwd_log="perf_grouped_conv_fwd_fp16.log"
print_log_header $conv_fwd_log $env_type $branch $host_name
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
#run grouped_bwd_data fp16 tests
export grouped_conv_bwd_data_log="perf_grouped_conv_bwd_data_fp16.log"
print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
#run grouped_bwd_weight fp16 tests
export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight_fp16.log"
print_log_header $grouped_conv_bwd_weight_log $env_type $branch $host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
#run resnet50 tests #run resnet50 tests
export resnet256_log="perf_resnet50_N256.log" export resnet256_log="perf_resnet50_N256.log"
print_log_header $resnet256_log $env_type $branch $host_name print_log_header $resnet256_log $env_type $branch $host_name
......
#!/bin/bash
## The following will be used for CI
set -x
## for float
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 0 2
## for float64
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 6 2
## for float16
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 1 2
## for int8_t
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 3 2
## for bfloat16
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 5 2
set +x
...@@ -68,11 +68,11 @@ function(add_test_executable TEST_NAME) ...@@ -68,11 +68,11 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list #only continue if there are some source files left on the list
if(ARGN) if(ARGN)
if(ARGN MATCHES "_xdl") if(ARGN MATCHES "_xdl")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
elseif(ARGN MATCHES "_wmma") elseif(ARGN MATCHES "_wmma")
list(REMOVE_ITEM TEST_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) list(REMOVE_ITEM TEST_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030)
elseif(ARGN MATCHES "_smfmac") elseif(ARGN MATCHES "_smfmac")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a) list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201)
endif() endif()
set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_executable(${TEST_NAME} ${ARGN}) add_executable(${TEST_NAME} ${ARGN})
...@@ -149,11 +149,11 @@ function(add_gtest_executable TEST_NAME) ...@@ -149,11 +149,11 @@ function(add_gtest_executable TEST_NAME)
#only continue if there are some source files left on the list #only continue if there are some source files left on the list
if(ARGN) if(ARGN)
if(ARGN MATCHES "_xdl") if(ARGN MATCHES "_xdl")
list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
elseif(ARGN MATCHES "_wmma") elseif(ARGN MATCHES "_wmma")
list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030)
elseif(ARGN MATCHES "_smfmac") elseif(ARGN MATCHES "_smfmac")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a) list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201)
endif() endif()
set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_executable(${TEST_NAME} ${ARGN}) add_executable(${TEST_NAME} ${ARGN})
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <string> #include <string>
...@@ -24,12 +24,12 @@ class TestConvUtil : public ::testing::Test ...@@ -24,12 +24,12 @@ class TestConvUtil : public ::testing::Test
128, 128,
192, 192,
256, 256,
std::vector<ck::index_t>(ndims, 3), std::vector<ck::long_index_t>(ndims, 3),
std::vector<ck::index_t>(ndims, 71), std::vector<ck::long_index_t>(ndims, 71),
std::vector<ck::index_t>(ndims, s), std::vector<ck::long_index_t>(ndims, s),
std::vector<ck::index_t>(ndims, d), std::vector<ck::long_index_t>(ndims, d),
std::vector<ck::index_t>(ndims, p), std::vector<ck::long_index_t>(ndims, p),
std::vector<ck::index_t>(ndims, p)); std::vector<ck::long_index_t>(ndims, p));
} }
protected: protected:
...@@ -48,35 +48,35 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D) ...@@ -48,35 +48,35 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D)
{ {
// stride 2, dilation 1, pad 1 // stride 2, dilation 1, pad 1
SetNDParams(1, 2, 1, 1); SetNDParams(1, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::long_index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D.")); out_spatial_len, std::vector<ck::long_index_t>{36}, "Error: ConvParams 1D."));
// stride 1, dilation 1, pad 1 // stride 1, dilation 1, pad 1
SetNDParams(1, 1, 1, 1); SetNDParams(1, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}.")); out_spatial_len, std::vector<ck::long_index_t>{71}, "Error: ConvParams 1D stride {1}."));
// stride 2, dilation 1, pad 2 // stride 2, dilation 1, pad 2
SetNDParams(1, 2, 1, 2); SetNDParams(1, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37}, std::vector<ck::long_index_t>{37},
"Error: ConvParams 1D padding left/right {2}.")); "Error: ConvParams 1D padding left/right {2}."));
// stride 2, dilation 2, pad 2 // stride 2, dilation 2, pad 2
SetNDParams(1, 2, 2, 2); SetNDParams(1, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}.")); out_spatial_len, std::vector<ck::long_index_t>{36}, "Error: ConvParams 1D dilation {2}."));
// stride 3, dilation 2, pad 1 // stride 3, dilation 2, pad 1
SetNDParams(1, 3, 2, 1); SetNDParams(1, 3, 2, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE( EXPECT_TRUE(
ck::utils::check_err(out_spatial_len, ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23}, std::vector<ck::long_index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}.")); "Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."));
} }
...@@ -84,36 +84,38 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D) ...@@ -84,36 +84,38 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
{ {
// stride 2, dilation 1, pad 1 // stride 2, dilation 1, pad 1
SetNDParams(2, 2, 1, 1); SetNDParams(2, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::long_index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36}, std::vector<ck::long_index_t>{36, 36},
"Error: ConvParams 2D default constructor.")); "Error: ConvParams 2D default constructor."));
// stride 1, dilation 1, pad 1 // stride 1, dilation 1, pad 1
SetNDParams(2, 1, 1, 1); SetNDParams(2, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}.")); std::vector<ck::long_index_t>{71, 71},
"Error: ConvParams 2D stride {1,1}."));
// stride 2, dilation 1, pad 2 // stride 2, dilation 1, pad 2
SetNDParams(2, 2, 1, 2); SetNDParams(2, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37}, std::vector<ck::long_index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}.")); "Error: ConvParams 2D padding left/right {2,2}."));
// stride 2, dilation 2, pad 2 // stride 2, dilation 2, pad 2
SetNDParams(2, 2, 2, 2); SetNDParams(2, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}.")); std::vector<ck::long_index_t>{36, 36},
"Error: ConvParams 2D dilation {2,2}."));
// stride 3, dilation 2, pad 1 // stride 3, dilation 2, pad 1
SetNDParams(2, 3, 2, 1); SetNDParams(2, 3, 2, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE( EXPECT_TRUE(
ck::utils::check_err(out_spatial_len, ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23, 23}, std::vector<ck::long_index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}.")); "Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
} }
...@@ -121,29 +123,29 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D) ...@@ -121,29 +123,29 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
{ {
// stride 2, dilation 1, pad 1 // stride 2, dilation 1, pad 1
SetNDParams(3, 2, 1, 1); SetNDParams(3, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::long_index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D.")); out_spatial_len, std::vector<ck::long_index_t>{36, 36, 36}, "Error: ConvParams 3D."));
// stride 1, dilation 1, pad 1 // stride 1, dilation 1, pad 1
SetNDParams(3, 1, 1, 1); SetNDParams(3, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71}, std::vector<ck::long_index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}.")); "Error: ConvParams 3D stride {1, 1, 1}."));
// stride 2, dilation 1, pad 2 // stride 2, dilation 1, pad 2
SetNDParams(3, 2, 1, 2); SetNDParams(3, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37}, std::vector<ck::long_index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}.")); "Error: ConvParams 3D padding left/right {2, 2, 2}."));
// stride 2, dilation 2, pad 2 // stride 2, dilation 2, pad 2
SetNDParams(3, 2, 2, 2); SetNDParams(3, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36}, std::vector<ck::long_index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}.")); "Error: ConvParams 3D dilation {2, 2, 2}."));
// stride 3, dilation 2, pad 1 // stride 3, dilation 2, pad 1
...@@ -151,6 +153,6 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D) ...@@ -151,6 +153,6 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, out_spatial_len,
std::vector<ck::index_t>{23, 23, 23}, std::vector<ck::long_index_t>{23, 23, 23},
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}.")); "Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."));
} }
if (GPU_TARGETS)
if (GPU_TARGETS MATCHES "gfx10" OR GPU_TARGETS MATCHES "gfx11" OR GPU_TARGETS MATCHES "gfx12")
add_definitions(-DCK_SKIP_FLAKY_F8_TEST)
set(CK_SKIP_FLAKY_F8_TEST "ON")
endif()
else()
add_definitions(-DCK_SKIP_FLAKY_F8_TEST)
set(CK_SKIP_FLAKY_F8_TEST "ON")
endif()
if (USE_BITINT_EXTENSION_INT4) if (USE_BITINT_EXTENSION_INT4)
add_gtest_executable(test_int4 test_int4.cpp) add_gtest_executable(test_int4 test_int4.cpp)
if(result EQUAL 0) if(result EQUAL 0)
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "ck/utility/data_type.hpp" #include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp" #include "ck/utility/type_convert.hpp"
using ck::bf8_t; using ck::bf8_t;
using ck::f8_convert_rne;
using ck::f8_convert_sr; using ck::f8_convert_sr;
using ck::half_t; using ck::half_t;
using ck::type_convert; using ck::type_convert;
...@@ -24,33 +25,36 @@ TEST(BF8, ConvertFP32Nearest) ...@@ -24,33 +25,36 @@ TEST(BF8, ConvertFP32Nearest)
// fix the tolerance value // fix the tolerance value
float abs_tol = 1e-6; float abs_tol = 1e-6;
// convert 0 float to bf8 and back, check if holds // convert 0 float to bf8 and back, check if holds
ASSERT_NEAR(0.0f, type_convert<float>(type_convert<bf8_t>(0.0f)), abs_tol); ASSERT_NEAR(0.0f, type_convert<float>(f8_convert_rne<bf8_t>(0.0f)), abs_tol);
// don't run the next test on gfx11 devices
#ifndef CK_SKIP_FLAKY_F8_TEST
// convert minimal float to bf8 and back, check if holds // convert minimal float to bf8 and back, check if holds
ASSERT_NEAR(std::numeric_limits<float>::min(), ASSERT_NEAR(std::numeric_limits<float>::min(),
type_convert<float>(type_convert<bf8_t>(std::numeric_limits<float>::min())), type_convert<float>(f8_convert_rne<bf8_t>(std::numeric_limits<float>::min())),
abs_tol); abs_tol);
#endif
// convert maximal bf8_t to float and check if equal to 57344.0 // convert maximal bf8_t to float and check if equal to 57344.0
ASSERT_NEAR(57344.0f, type_convert<float>(type_convert<bf8_t>(57344.0f)), abs_tol); ASSERT_NEAR(57344.0f, type_convert<float>(f8_convert_rne<bf8_t>(57344.0f)), abs_tol);
// convert maximal float to bf8 and back, check if clipped to 57344.0 // convert maximal float to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR(57344.0f, ASSERT_NEAR(57344.0f,
type_convert<float>(type_convert<bf8_t>(std::numeric_limits<float>::max())), type_convert<float>(f8_convert_rne<bf8_t>(std::numeric_limits<float>::max())),
abs_tol); abs_tol);
// convert inf float to bf8_t and check if it is qNan // convert inf float to bf8_t and check if it is qNan
ASSERT_NEAR(type_convert<bf8_t>(0x80), ASSERT_NEAR(type_convert<bf8_t>(0x80),
type_convert<bf8_t>(std::numeric_limits<float>::infinity()), f8_convert_rne<bf8_t>(std::numeric_limits<float>::infinity()),
abs_tol); abs_tol);
// positive norm float value to bf8 and back, check if holds // positive norm float value to bf8 and back, check if holds
float pos_float = 0.0000762939f; float pos_float = 0.0000762939f;
ASSERT_NEAR(pos_float, type_convert<float>(type_convert<bf8_t>(pos_float)), abs_tol); ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<bf8_t>(pos_float)), abs_tol);
// negative norm float value to bf8 and back, check if holds // negative norm float value to bf8 and back, check if holds
float neg_float = -0.0000610351f; float neg_float = -0.0000610351f;
ASSERT_NEAR(neg_float, type_convert<float>(type_convert<bf8_t>(neg_float)), abs_tol); ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<bf8_t>(neg_float)), abs_tol);
// positive subnorm float value to bf8 and back, check if holds // positive subnorm float value to bf8 and back, check if holds
pos_float = 0.0000305175f; pos_float = 0.0000305175f;
ASSERT_NEAR(pos_float, type_convert<float>(type_convert<bf8_t>(pos_float)), abs_tol); ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<bf8_t>(pos_float)), abs_tol);
// negative subnorm float value to bf8 and back, check if holds // negative subnorm float value to bf8 and back, check if holds
neg_float = -0.0000152587f; neg_float = -0.0000152587f;
ASSERT_NEAR(neg_float, type_convert<float>(type_convert<bf8_t>(neg_float)), abs_tol); ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<bf8_t>(neg_float)), abs_tol);
} }
TEST(BF8, ConvertFP32Stochastic) TEST(BF8, ConvertFP32Stochastic)
...@@ -92,34 +96,34 @@ TEST(BF8, ConvertFP16Nearest) ...@@ -92,34 +96,34 @@ TEST(BF8, ConvertFP16Nearest)
// fix the tolerance value // fix the tolerance value
float abs_tol = 1e-3; float abs_tol = 1e-3;
// convert 0 fp16 to bf8 and back, check if holds // convert 0 fp16 to bf8 and back, check if holds
ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(type_convert<bf8_t>(half_t{0.0})), abs_tol); ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(f8_convert_rne<bf8_t>(half_t{0.0})), abs_tol);
// convert minimal fp16 to bf8 and back, check if holds // convert minimal fp16 to bf8 and back, check if holds
ASSERT_NEAR(ck::NumericLimits<half_t>::Min(), ASSERT_NEAR(ck::NumericLimits<half_t>::Min(),
type_convert<half_t>(type_convert<bf8_t>(ck::NumericLimits<half_t>::Min())), type_convert<half_t>(f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::Min())),
abs_tol); abs_tol);
// convert maximal bf8_t to fp16 and check if equal to 57344.0 // convert maximal bf8_t to fp16 and check if equal to 57344.0
ASSERT_NEAR( ASSERT_NEAR(
half_t{57344.0}, type_convert<half_t>(type_convert<bf8_t>(half_t{57344.0})), abs_tol); half_t{57344.0}, type_convert<half_t>(f8_convert_rne<bf8_t>(half_t{57344.0})), abs_tol);
// convert maximal fp16 to bf8 and back, check if clipped to 57344.0 // convert maximal fp16 to bf8 and back, check if clipped to 57344.0
ASSERT_NEAR(half_t{57344.0}, ASSERT_NEAR(half_t{57344.0},
type_convert<half_t>(type_convert<bf8_t>(ck::NumericLimits<half_t>::Max())), type_convert<half_t>(f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::Max())),
abs_tol); abs_tol);
// convert QuietNaN fp16 to bf8_t and check if it is QuietNaN // convert QuietNaN fp16 to bf8_t and check if it is QuietNaN
ASSERT_NEAR(type_convert<bf8_t>(0x80), ASSERT_NEAR(type_convert<bf8_t>(0x80),
type_convert<bf8_t>(ck::NumericLimits<half_t>::QuietNaN()), f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::QuietNaN()),
abs_tol); abs_tol);
// positive norm fp16 value to bf8 and back, check if holds // positive norm fp16 value to bf8 and back, check if holds
half_t pos_half = half_t{0.0000762939}; half_t pos_half = half_t{0.0000762939};
ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<bf8_t>(pos_half)), abs_tol); ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<bf8_t>(pos_half)), abs_tol);
// negative norm fp16 value to bf8 and back, check if holds // negative norm fp16 value to bf8 and back, check if holds
half_t neg_half = half_t{-0.0000610351}; half_t neg_half = half_t{-0.0000610351};
ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<bf8_t>(neg_half)), abs_tol); ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<bf8_t>(neg_half)), abs_tol);
// positive subnorm fp16 value to bf8 and back, check if holds // positive subnorm fp16 value to bf8 and back, check if holds
pos_half = half_t{0.0000305175}; pos_half = half_t{0.0000305175};
ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<bf8_t>(pos_half)), abs_tol); ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<bf8_t>(pos_half)), abs_tol);
// negative subnorm fp16 value to bf8 and back, check if holds // negative subnorm fp16 value to bf8 and back, check if holds
neg_half = half_t{-0.0000152587}; neg_half = half_t{-0.0000152587};
ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<bf8_t>(neg_half)), abs_tol); ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<bf8_t>(neg_half)), abs_tol);
} }
TEST(BF8, ConvertFP16Stochastic) TEST(BF8, ConvertFP16Stochastic)
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "ck/utility/data_type.hpp" #include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp" #include "ck/utility/type_convert.hpp"
using ck::f8_convert_rne;
using ck::f8_convert_sr; using ck::f8_convert_sr;
using ck::f8_t; using ck::f8_t;
using ck::half_t; using ck::half_t;
...@@ -24,33 +25,36 @@ TEST(FP8, ConvertFP32Nearest) ...@@ -24,33 +25,36 @@ TEST(FP8, ConvertFP32Nearest)
// fix the tolerance value // fix the tolerance value
float abs_tol = 1e-6; float abs_tol = 1e-6;
// convert 0 float to fp8 and back, check if holds // convert 0 float to fp8 and back, check if holds
ASSERT_NEAR(0.0f, type_convert<float>(type_convert<f8_t>(0.0f)), abs_tol); ASSERT_NEAR(0.0f, type_convert<float>(f8_convert_rne<f8_t>(0.0f)), abs_tol);
// don't run the next test on gfx11 devices
#ifndef CK_SKIP_FLAKY_F8_TEST
// convert minimal float to fp8 and back, check if holds // convert minimal float to fp8 and back, check if holds
ASSERT_NEAR(std::numeric_limits<float>::min(), ASSERT_NEAR(std::numeric_limits<float>::min(),
type_convert<float>(type_convert<f8_t>(std::numeric_limits<float>::min())), type_convert<float>(f8_convert_rne<f8_t>(std::numeric_limits<float>::min())),
abs_tol); abs_tol);
#endif
// convert maximal f8_t to float and check if equal to 240.0 // convert maximal f8_t to float and check if equal to 240.0
ASSERT_NEAR(240.0f, type_convert<float>(type_convert<f8_t>(240.0f)), abs_tol); ASSERT_NEAR(240.0f, type_convert<float>(f8_convert_rne<f8_t>(240.0f)), abs_tol);
// convert maximal float to fp8 and back, check if clipped to 240.0 // convert maximal float to fp8 and back, check if clipped to 240.0
ASSERT_NEAR(240.0f, ASSERT_NEAR(240.0f,
type_convert<float>(type_convert<f8_t>(std::numeric_limits<float>::max())), type_convert<float>(f8_convert_rne<f8_t>(std::numeric_limits<float>::max())),
abs_tol); abs_tol);
// convert inf float to f8_t and check if it is qNan // convert inf float to f8_t and check if it is qNan
ASSERT_NEAR(type_convert<f8_t>(0x80), ASSERT_NEAR(type_convert<f8_t>(0x80),
type_convert<f8_t>(std::numeric_limits<float>::infinity()), f8_convert_rne<f8_t>(std::numeric_limits<float>::infinity()),
abs_tol); abs_tol);
// positive norm float value to fp8 and back, check if holds // positive norm float value to fp8 and back, check if holds
float pos_float = 0.017578125f; float pos_float = 0.017578125f;
ASSERT_NEAR(pos_float, type_convert<float>(type_convert<f8_t>(pos_float)), abs_tol); ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_t>(pos_float)), abs_tol);
// negative norm float value to fp8 and back, check if holds // negative norm float value to fp8 and back, check if holds
float neg_float = -0.015625f; float neg_float = -0.015625f;
ASSERT_NEAR(neg_float, type_convert<float>(type_convert<f8_t>(neg_float)), abs_tol); ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<f8_t>(neg_float)), abs_tol);
// positive subnorm float value to fp8 and back, check if holds // positive subnorm float value to fp8 and back, check if holds
pos_float = 0.00390625f; pos_float = 0.00390625f;
ASSERT_NEAR(pos_float, type_convert<float>(type_convert<f8_t>(pos_float)), abs_tol); ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_t>(pos_float)), abs_tol);
// negative subnorm float value to fp8 and back, check if holds // negative subnorm float value to fp8 and back, check if holds
neg_float = -0.001953125f; neg_float = -0.001953125f;
ASSERT_NEAR(neg_float, type_convert<float>(type_convert<f8_t>(neg_float)), abs_tol); ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<f8_t>(neg_float)), abs_tol);
} }
TEST(FP8, ConvertFP32Stochastic) TEST(FP8, ConvertFP32Stochastic)
...@@ -92,33 +96,33 @@ TEST(FP8, ConvertFP16Nearest) ...@@ -92,33 +96,33 @@ TEST(FP8, ConvertFP16Nearest)
// fix the tolerance value // fix the tolerance value
float abs_tol = 1e-3; float abs_tol = 1e-3;
// convert 0 fp16 to fp8 and back, check if holds // convert 0 fp16 to fp8 and back, check if holds
ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(type_convert<f8_t>(half_t{0.0})), abs_tol); ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(f8_convert_rne<f8_t>(half_t{0.0})), abs_tol);
// convert minimal fp16 to fp8 and back, check if holds // convert minimal fp16 to fp8 and back, check if holds
ASSERT_NEAR(ck::NumericLimits<half_t>::Min(), ASSERT_NEAR(ck::NumericLimits<half_t>::Min(),
type_convert<half_t>(type_convert<f8_t>(ck::NumericLimits<half_t>::Min())), type_convert<half_t>(f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::Min())),
abs_tol); abs_tol);
// convert maximal f8_t to fp16 and check if equal to 240.0 // convert maximal f8_t to fp16 and check if equal to 240.0
ASSERT_NEAR(half_t{240.0}, type_convert<half_t>(type_convert<f8_t>(half_t{240.0})), abs_tol); ASSERT_NEAR(half_t{240.0}, type_convert<half_t>(f8_convert_rne<f8_t>(half_t{240.0})), abs_tol);
// convert maximal fp16 to fp8 and back, check if clipped to 240.0 // convert maximal fp16 to fp8 and back, check if clipped to 240.0
ASSERT_NEAR(half_t{240.0}, ASSERT_NEAR(half_t{240.0},
type_convert<half_t>(type_convert<f8_t>(ck::NumericLimits<half_t>::Max())), type_convert<half_t>(f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::Max())),
abs_tol); abs_tol);
// convert QuietNaN fp16 to f8_t and check if it is QuietNaN // convert QuietNaN fp16 to f8_t and check if it is QuietNaN
ASSERT_NEAR(type_convert<f8_t>(0x80), ASSERT_NEAR(type_convert<f8_t>(0x80),
type_convert<f8_t>(ck::NumericLimits<half_t>::QuietNaN()), f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::QuietNaN()),
abs_tol); abs_tol);
// positive norm fp16 value to fp8 and back, check if holds // positive norm fp16 value to fp8 and back, check if holds
half_t pos_half = half_t{0.017578125}; half_t pos_half = half_t{0.017578125};
ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<f8_t>(pos_half)), abs_tol); ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<f8_t>(pos_half)), abs_tol);
// negative norm fp16 value to fp8 and back, check if holds // negative norm fp16 value to fp8 and back, check if holds
half_t neg_half = half_t{-0.015625}; half_t neg_half = half_t{-0.015625};
ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<f8_t>(neg_half)), abs_tol); ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<f8_t>(neg_half)), abs_tol);
// positive subnorm fp16 value to fp8 and back, check if holds // positive subnorm fp16 value to fp8 and back, check if holds
pos_half = half_t{0.00390625}; pos_half = half_t{0.00390625};
ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<f8_t>(pos_half)), abs_tol); ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<f8_t>(pos_half)), abs_tol);
// negative subnorm fp16 value to fp8 and back, check if holds // negative subnorm fp16 value to fp8 and back, check if holds
neg_half = half_t{-0.001953125}; neg_half = half_t{-0.001953125};
ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<f8_t>(neg_half)), abs_tol); ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<f8_t>(neg_half)), abs_tol);
} }
TEST(FP8, ConvertFP16Stochastic) TEST(FP8, ConvertFP16Stochastic)
......
...@@ -44,17 +44,22 @@ class TestGemmUniversal_MK_NK ...@@ -44,17 +44,22 @@ class TestGemmUniversal_MK_NK
using KernelTypes_MK_KN = ::testing::Types< using KernelTypes_MK_KN = ::testing::Types<
// ADataType, BDataType, ComputeDataType, CDataType // ADataType, BDataType, ComputeDataType, CDataType
std::tuple< F16, F16, F16, F16>, std::tuple< F16, F16, F16, F16>,
#if (defined CK_ENABLE_FP8)
std::tuple< F16, F8, F16, F16>, std::tuple< F16, F8, F16, F16>,
std::tuple< F8, F16, F16, F16>, std::tuple< F8, F16, F16, F16>,
std::tuple< F8, F8, F8, BF16>,
#endif
std::tuple< BF16, BF16, BF16, BF16> std::tuple< BF16, BF16, BF16, BF16>
>; >;
using KernelTypes_MK_NK = ::testing::Types< using KernelTypes_MK_NK = ::testing::Types<
// ADataType, BDataType, ComputeDataType, CDataType // ADataType, BDataType, ComputeDataType, CDataType
std::tuple< F16, F16, F16, F16>, std::tuple< F16, F16, F16, F16>,
#if (defined CK_ENABLE_FP8)
std::tuple< F16, F8, F16, F16>, std::tuple< F16, F8, F16, F16>,
std::tuple< F8, F16, F16, F16>, std::tuple< F8, F16, F16, F16>,
std::tuple< BF16, BF16, BF16, BF16>, std::tuple< F8, F8, F8, BF16>,
std::tuple< F8, F8, F8, BF16> #endif
std::tuple< BF16, BF16, BF16, BF16>
>; >;
// clang-format on // clang-format on
......
...@@ -7,6 +7,12 @@ if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11") ...@@ -7,6 +7,12 @@ if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11")
endif() endif()
endif() endif()
if(GPU_TARGETS MATCHES "gfx9")
add_executable(test_grouped_convnd_fwd_large_cases_xdl test_grouped_convnd_fwd_large_cases_xdl.cpp)
target_compile_options(test_grouped_convnd_fwd_large_cases_xdl PRIVATE -Wno-global-constructors -Wno-undef)
target_link_libraries(test_grouped_convnd_fwd_large_cases_xdl PRIVATE gtest_main getopt::getopt utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance)
endif()
add_gtest_executable(test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp) add_gtest_executable(test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp)
if(result EQUAL 0) if(result EQUAL 0)
target_link_libraries(test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility) target_link_libraries(test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility)
......
...@@ -17,6 +17,7 @@ class TestGroupedConvndFwd : public ::testing::Test ...@@ -17,6 +17,7 @@ class TestGroupedConvndFwd : public ::testing::Test
using InLayout = std::tuple_element_t<1, Tuple>; using InLayout = std::tuple_element_t<1, Tuple>;
using WeiLayout = std::tuple_element_t<2, Tuple>; using WeiLayout = std::tuple_element_t<2, Tuple>;
using OutLayout = std::tuple_element_t<3, Tuple>; using OutLayout = std::tuple_element_t<3, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params; std::vector<ck::utils::conv::ConvParam> conv_params;
...@@ -33,7 +34,10 @@ class TestGroupedConvndFwd : public ::testing::Test ...@@ -33,7 +34,10 @@ class TestGroupedConvndFwd : public ::testing::Test
OutLayout, OutLayout,
DataType, DataType,
DataType, DataType,
DataType>( DataType,
DataType,
DataType,
IndexType>(
true, // do_verification true, // do_verification
1, // init_method: integer value 1, // init_method: integer value
false, // do_log false, // do_log
...@@ -69,8 +73,6 @@ using KernelTypes3d = ::testing::Types<std::tuple<float, GNDHWC, GKZYXC, GNDHWK> ...@@ -69,8 +73,6 @@ using KernelTypes3d = ::testing::Types<std::tuple<float, GNDHWC, GKZYXC, GNDHWK>
std::tuple<ck::bhalf_t, NDHWGC, GKZYXC, NDHWGK>, std::tuple<ck::bhalf_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<int8_t, NDHWGC, GKZYXC, NDHWGK>>; std::tuple<int8_t, NDHWGC, GKZYXC, NDHWGK>>;
using KernelTypes2dLargeCases = ::testing::Types<std::tuple<float, NHWGC, GKYXC, NHWGK>>;
template <typename Tuple> template <typename Tuple>
class TestGroupedConvndFwd1d : public TestGroupedConvndFwd<Tuple> class TestGroupedConvndFwd1d : public TestGroupedConvndFwd<Tuple>
{ {
...@@ -86,15 +88,9 @@ class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple> ...@@ -86,15 +88,9 @@ class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple>
{ {
}; };
template <typename Tuple>
class TestGroupedConvndFwd2dLargeCases : public TestGroupedConvndFwd<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwd1d, KernelTypes1d); TYPED_TEST_SUITE(TestGroupedConvndFwd1d, KernelTypes1d);
TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d); TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d);
TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d); TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d);
TYPED_TEST_SUITE(TestGroupedConvndFwd2dLargeCases, KernelTypes2dLargeCases);
TYPED_TEST(TestGroupedConvndFwd1d, Test1D) TYPED_TEST(TestGroupedConvndFwd1d, Test1D)
{ {
...@@ -144,14 +140,3 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D) ...@@ -144,14 +140,3 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{3, 96, 1, 1, 1, {3, 3, 3}, {4, 30, 160}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}); {3, 96, 1, 1, 1, {3, 3, 3}, {4, 30, 160}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>(); this->template Run<3>();
} }
TYPED_TEST(TestGroupedConvndFwd2dLargeCases, Test2DLargeCases)
{
// Case larger than 2GB
this->conv_params.push_back(
{2, 1, 64, 4, 192, {2, 2}, {224, 224}, {224, 224}, {1, 1}, {0, 0}, {0, 0}});
// With supported NumGroupsToMerge > 1
this->conv_params.push_back(
{2, 32, 64, 1, 1, {2, 2}, {672, 672}, {672, 672}, {1, 1}, {0, 0}, {0, 0}});
this->template Run<2>();
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
{
protected:
using DataType = std::tuple_element_t<0, Tuple>;
using InLayout = std::tuple_element_t<1, Tuple>;
using WeiLayout = std::tuple_element_t<2, Tuple>;
using OutLayout = std::tuple_element_t<3, Tuple>;
using IndexType = ck::long_index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType,
DataType,
DataType,
IndexType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes2d = ::testing::Types<std::tuple<float, NHWGC, GKYXC, NHWGK>,
std::tuple<ck::half_t, NHWGC, GKYXC, NHWGK>,
std::tuple<ck::bhalf_t, NHWGC, GKYXC, NHWGK>>;
using KernelTypes3d = ::testing::Types<std::tuple<float, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::half_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::bhalf_t, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwd2d : public TestGroupedConvndFwd<Tuple>
{
};
template <typename Tuple>
class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d);
TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwd2d, Test2D)
{
// Case larger than 2GB
this->conv_params.push_back(
{2, 1, 128, 4, 192, {2, 2}, {224, 224}, {224, 224}, {1, 1}, {0, 0}, {0, 0}});
// With supported NumGroupsToMerge > 1
this->conv_params.push_back(
{2, 32, 64, 1, 1, {2, 2}, {672, 672}, {672, 672}, {1, 1}, {0, 0}, {0, 0}});
// When image is larger than 2GB
this->conv_params.push_back(
{2, 2, 2, 128, 128, {3, 3}, {4096, 2048}, {300, 300}, {3, 3}, {1, 1}, {1, 1}});
this->template Run<2>();
}
TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{
// Case larger than 2GB
this->conv_params.push_back({3,
1,
128,
4,
192,
{2, 2, 2},
{2, 224, 224},
{1, 224, 224},
{1, 1, 1},
{0, 0, 0},
{0, 0, 0}});
// With supported NumGroupsToMerge > 1
this->conv_params.push_back({3,
32,
64,
1,
1,
{2, 2, 2},
{360, 2, 672},
{360, 2, 672},
{1, 1, 1},
{0, 0, 0},
{0, 0, 0}});
// When image is larger than 2GB
this->conv_params.push_back({3,
1,
2,
128,
128,
{3, 1, 3},
{900, 2, 2048},
{300, 1, 300},
{3, 2, 3},
{1, 1, 1},
{1, 1, 1}});
this->template Run<3>();
}
add_test_executable(test_reduce_no_index reduce_no_index.cpp) add_gtest_executable(test_reduce_no_index reduce_no_index.cpp)
add_test_executable(test_reduce_with_index reduce_with_index.cpp) add_gtest_executable(test_reduce_with_index reduce_with_index.cpp)
target_link_libraries(test_reduce_no_index PRIVATE utility device_reduce_instance) target_link_libraries(test_reduce_no_index PRIVATE utility device_reduce_instance)
target_link_libraries(test_reduce_with_index PRIVATE utility device_reduce_instance) target_link_libraries(test_reduce_with_index PRIVATE utility device_reduce_instance)
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <getopt.h> #include <getopt.h>
#include "ck/library/utility/host_common_util.hpp" #include "ck/library/utility/host_common_util.hpp"
#include "profiler/profile_reduce_impl.hpp" #include "profiler/profile_reduce_impl.hpp"
#include <gtest/gtest.h>
using namespace ck; using namespace ck;
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'}, struct ReduceParam
{"reduceDimensions", required_argument, nullptr, 'R'}, {
{"scales", required_argument, nullptr, 'S'}, bool do_verification{true};
{"help", no_argument, nullptr, '?'}, bool propagateNan{false};
{nullptr, 0, nullptr, 0}}; bool useIndex{false};
bool time_kernel{false};
bool do_dumpout{false};
int init_method{2};
float alpha{1.0f};
float beta{0.0f};
std::vector<size_t> inLengths{64, 4, 280, 82};
std::vector<int> reduceDims{0, 1, 2, 3};
};
class SimpleAppArgs std::vector<std::vector<int>> SetGenericReduceDim()
{ {
private: return {{0, 1, 2, 3}, {0, 1, 2}, {0, 1, 3}, {0, 2, 3}, {1, 2, 3}, {0}, {1}, {2}, {3}};
int option_index = 0; }
public: template <typename T>
std::vector<size_t> inLengths; class ReduceWithIndexTest : public ::testing::Test
std::vector<int> reduceDims; {
std::vector<float> scales; protected:
using InDataType = std::tuple_element_t<0, T>;
using AccDataType = std::tuple_element_t<1, T>;
using OutDataType = std::tuple_element_t<2, T>;
int data_type; static std::vector<ReduceParam> params;
int init_method = 1;
public: static void SetUpTestSuite()
void show_usage(const char* cmd)
{ {
std::cout << "Usage of " << cmd << std::endl; // set testcase variables
std::cout << "--inLengths or -D, comma separated list of input tensor dimension lengths " ReduceParam set;
"(only 4-d tensor supported)" const auto setReduceDim = SetGenericReduceDim();
<< std::endl;
std::cout << "--reduceDimensions or -R comma seperated list of dimension indexes to reduce "
"(only 1 or 3 or 4 dimensions supported)"
<< std::endl;
std::cout << "--scales or -S, comma separated two float values for alpha and beta"
<< std::endl;
std::cout << "Arg1 -- data type (0: fp16, 1: fp32, 3: int8, 5: bp16, 6: fp64)" << std::endl;
std::cout << "Arg2 -- init method(0=no init, 1=single integer value, 2=scope integer "
"value, 3=decimal value)"
<< std::endl;
};
int processArgs(int argc, char* argv[])
{
using ck::host_common::getTypeValuesFromString;
int ch;
while(1) for(std::size_t i(0); i < setReduceDim.size(); ++i)
{ {
ch = getopt_long(argc, argv, "D:R:S:", long_options, &option_index); set.reduceDims = setReduceDim[i];
if(ch == -1) params.emplace_back(set);
break; }
switch(ch) }
{
case 'D': template <ReduceTensorOp ReduceOpIdType>
if(!optarg) void Run()
throw std::runtime_error("Invalid option format!"); {
for(auto param : this->params)
inLengths = getTypeValuesFromString<size_t>(optarg);
break;
case 'R':
if(!optarg)
throw std::runtime_error("Invalid option format!");
reduceDims = getTypeValuesFromString<int>(optarg);
break;
case 'S':
if(!optarg)
throw std::runtime_error("Invalid option format!");
scales = getTypeValuesFromString<float>(optarg);
break;
case '?':
if(std::string(long_options[option_index].name) == "help")
{
show_usage(argv[0]);
return (-1);
};
break;
default: show_usage(argv[0]); return (-1);
};
};
if(optind + 2 > argc)
throw std::runtime_error("Invalid cmd-line arguments, more argumetns are needed!");
data_type = std::atoi(argv[optind++]);
init_method = std::atoi(argv[optind]);
if(scales.empty())
{ {
scales.push_back(1.0f); bool success = ck::profiler::profile_reduce_impl<InDataType, AccDataType, OutDataType>(
scales.push_back(0.0f); param.do_verification,
}; param.init_method,
param.do_dumpout,
param.time_kernel,
param.inLengths,
param.reduceDims,
ReduceOpIdType,
param.propagateNan,
param.useIndex,
param.alpha,
param.beta);
EXPECT_TRUE(success);
}
}
};
if(inLengths.size() != 4 || template <typename T>
(reduceDims.size() != 1 && reduceDims.size() != 3 && reduceDims.size() != 4)) std::vector<ReduceParam> ReduceWithIndexTest<T>::params = {};
return (-1);
if(data_type != 0 && data_type != 1 && data_type != 3 && data_type != 5 && data_type != 6) using Reduce_float_types = ::testing::Types<std::tuple<float, float, float>>;
return (-1); using Reduce_double_types = ::testing::Types<std::tuple<double, double, double>>;
using Reduce_int8t_types = ::testing::Types<std::tuple<int8_t, int8_t, int8_t>>;
using Reduce_half_types = ::testing::Types<std::tuple<ck::half_t, ck::half_t, ck::half_t>>;
using Reduce_bhalf_float_Types = ::testing::Types<std::tuple<ck::bhalf_t, float, ck::bhalf_t>>;
return (0); template <typename TType>
}; class ReduceWithNoIndexFloat : public ReduceWithIndexTest<TType>
{
}; };
bool test_reduce_no_index(int data_type, template <typename TType>
int init_method, class ReduceWithNoIndexDouble : public ReduceWithIndexTest<TType>
std::vector<int> reduceDims,
std::vector<size_t> inLengths,
ReduceTensorOp reduceOpId,
bool propagateNan,
float alpha,
float beta)
{ {
using ck::profiler::profile_reduce_impl; };
bool result = true; template <typename TType>
class ReduceWithNoIndexInt8 : public ReduceWithIndexTest<TType>
{
};
if(data_type == 0) template <typename TType>
{ class ReduceWithNoIndexHalf : public ReduceWithIndexTest<TType>
result = profile_reduce_impl<float, float, float>(true, {
init_method, };
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
else if(data_type == 1)
{
result = profile_reduce_impl<ck::half_t, float, ck::half_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
else if(data_type == 3)
{
result = profile_reduce_impl<int8_t, int32_t, int8_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
else if(data_type == 5)
{
result = profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
else if(data_type == 6)
{
result = profile_reduce_impl<double, double, double>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
return (result); template <typename TType>
class ReduceWithNoIndexBHalfFloat : public ReduceWithIndexTest<TType>
{
}; };
constexpr ReduceTensorOp reduceOpId = ReduceTensorOp::AVG; TYPED_TEST_SUITE(ReduceWithNoIndexFloat, Reduce_float_types);
constexpr bool propagateNan = false; TYPED_TEST_SUITE(ReduceWithNoIndexDouble, Reduce_double_types);
TYPED_TEST_SUITE(ReduceWithNoIndexInt8, Reduce_int8t_types);
TYPED_TEST_SUITE(ReduceWithNoIndexHalf, Reduce_half_types);
TYPED_TEST_SUITE(ReduceWithNoIndexBHalfFloat, Reduce_bhalf_float_Types);
int main(int argc, char* argv[]) TYPED_TEST(ReduceWithNoIndexFloat, ReduceWithNoIndexTestFloat_AMAX)
{ {
SimpleAppArgs args; // trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
bool result = true; TYPED_TEST(ReduceWithNoIndexFloat, ReduceWithNoIndexTestFloat_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
if(argc == 1) TYPED_TEST(ReduceWithNoIndexFloat, ReduceWithNoIndexTestFloat_MAX)
{ {
int data_type = 1; // trigger Run() -> Generic
int init_method = 2; this->template Run<ReduceTensorOp::MAX>();
std::vector<size_t> inLengths{64, 4, 280, 80}; }
std::vector<std::vector<int>> v_reduceDims{
{0, 1, 2, 3}, {0, 1, 2}, {1, 2, 3}, {0, 1, 3}, {0, 2, 3}, {0}, {1}, {2}, {3}}; TYPED_TEST(ReduceWithNoIndexDouble, ReduceWithNoIndexTestDouble_AMAX)
{
for(auto& reduceDims : v_reduceDims) // trigger Run() -> Generic
result = result && test_reduce_no_index(data_type, this->template Run<ReduceTensorOp::AMAX>();
init_method, }
reduceDims,
inLengths, TYPED_TEST(ReduceWithNoIndexDouble, ReduceWithNoIndexTestDouble_MIN)
reduceOpId, {
propagateNan, // trigger Run() -> Generic
1.0f, this->template Run<ReduceTensorOp::MIN>();
0.0f); }
}
else TYPED_TEST(ReduceWithNoIndexDouble, ReduceWithNoIndexTestDouble_MAX)
{ {
if(args.processArgs(argc, argv) < 0) // trigger Run() -> Generic
{ this->template Run<ReduceTensorOp::MAX>();
throw std::runtime_error( }
"Invalid input arguments, test_reduce_no_index could not be executed!");
}; TYPED_TEST(ReduceWithNoIndexInt8, ReduceWithNoIndexTestInt8_AMAX)
{
result = test_reduce_no_index(args.data_type, // trigger Run() -> Generic
args.init_method, this->template Run<ReduceTensorOp::AMAX>();
args.reduceDims, }
args.inLengths,
reduceOpId, TYPED_TEST(ReduceWithNoIndexInt8, ReduceWithNoIndexTestInt8_MIN)
propagateNan, {
args.scales[0], // trigger Run() -> Generic
args.scales[1]); this->template Run<ReduceTensorOp::MIN>();
} }
std::cout << "test_reduce_no_index ..... " << (result ? "SUCCESS" : "FAILURE") << std::endl; TYPED_TEST(ReduceWithNoIndexInt8, ReduceWithNoIndexTestInt8_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
}
TYPED_TEST(ReduceWithNoIndexHalf, ReduceWithNoIndexTestHalf_AMAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
TYPED_TEST(ReduceWithNoIndexHalf, ReduceWithNoIndexTestHalf_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
TYPED_TEST(ReduceWithNoIndexHalf, ReduceWithNoIndexTestHalf_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
}
TYPED_TEST(ReduceWithNoIndexBHalfFloat, ReduceWithNoIndexTesBtHalfFloat_AMAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
return (result ? 0 : -1); TYPED_TEST(ReduceWithNoIndexBHalfFloat, ReduceWithNoIndexTestBHalfFloat_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
TYPED_TEST(ReduceWithNoIndexBHalfFloat, ReduceWithNoIndexTestBHalfFloat_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
} }
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <getopt.h> #include <getopt.h>
#include "ck/library/utility/host_common_util.hpp" #include "ck/library/utility/host_common_util.hpp"
#include "profiler/profile_reduce_impl.hpp" #include "profiler/profile_reduce_impl.hpp"
#include <gtest/gtest.h>
using namespace ck; using namespace ck;
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'}, struct ReduceParam
{"reduceDimensions", required_argument, nullptr, 'R'}, {
{"scales", required_argument, nullptr, 'S'}, bool do_verification{true};
{"help", no_argument, nullptr, '?'}, bool propagateNan{false};
{nullptr, 0, nullptr, 0}}; bool useIndex{false};
bool time_kernel{false};
bool do_dumpout{false};
int init_method{2};
float alpha{1.0f};
float beta{0.0f};
std::vector<size_t> inLengths{64, 4, 280, 82};
std::vector<int> reduceDims{0, 1, 2, 3};
};
class SimpleAppArgs std::vector<std::vector<int>> SetGenericReduceDim()
{ {
private: return {{0, 1, 2, 3}, {0, 1, 2}, {0, 1, 3}, {0, 2, 3}, {1, 2, 3}, {0}, {1}, {2}, {3}};
int option_index = 0; }
public: template <typename T>
std::vector<size_t> inLengths; class ReduceWithIndexTest : public ::testing::Test
std::vector<int> reduceDims; {
std::vector<float> scales; protected:
using InDataType = std::tuple_element_t<0, T>;
using AccDataType = std::tuple_element_t<1, T>;
using OutDataType = std::tuple_element_t<2, T>;
int data_type; static std::vector<ReduceParam> params;
int init_method = 1;
public: static void SetUpTestSuite()
void show_usage(const char* cmd)
{ {
std::cout << "Usage of " << cmd << std::endl; // set testcase variables
std::cout << "--inLengths or -D, comma separated list of input tensor dimension lengths " ReduceParam set;
"(only 4-d tensor supported)" const auto setReduceDim = SetGenericReduceDim();
<< std::endl;
std::cout << "--reduceDimensions or -R comma seperated list of dimension indexes to reduce "
"(only 1 or 3 or 4 dimensions supported)"
<< std::endl;
std::cout << "--scales or -S, comma separated two float values for alpha and beta"
<< std::endl;
std::cout << "Arg1 -- data type (1: fp32, 3: int8, 5: bp16, 6: fp64)" << std::endl;
std::cout << "Arg2 -- init method(0=no init, 1=single integer value, 2=scope integer "
"value, 3=decimal value)"
<< std::endl;
};
int processArgs(int argc, char* argv[])
{
using ck::host_common::getTypeValuesFromString;
int ch;
while(1) for(std::size_t i(0); i < setReduceDim.size(); ++i)
{ {
ch = getopt_long(argc, argv, "D:R:S:", long_options, &option_index); set.reduceDims = setReduceDim[i];
if(ch == -1) params.emplace_back(set);
break; }
switch(ch) }
{
case 'D': template <ReduceTensorOp ReduceOpIdType>
if(!optarg) void Run()
throw std::runtime_error("Invalid option format!"); {
for(auto param : this->params)
inLengths = getTypeValuesFromString<size_t>(optarg);
break;
case 'R':
if(!optarg)
throw std::runtime_error("Invalid option format!");
reduceDims = getTypeValuesFromString<int>(optarg);
break;
case 'S':
if(!optarg)
throw std::runtime_error("Invalid option format!");
scales = getTypeValuesFromString<float>(optarg);
break;
case '?':
if(std::string(long_options[option_index].name) == "help")
{
show_usage(argv[0]);
return (-1);
};
break;
default: show_usage(argv[0]); return (-1);
};
};
if(optind + 2 > argc)
throw std::runtime_error("Invalid cmd-line arguments, more argumetns are needed!");
data_type = std::atoi(argv[optind++]);
init_method = std::atoi(argv[optind]);
if(scales.empty())
{ {
scales.push_back(1.0f); bool success = ck::profiler::profile_reduce_impl<InDataType, AccDataType, OutDataType>(
scales.push_back(0.0f); param.do_verification,
}; param.init_method,
param.do_dumpout,
param.time_kernel,
param.inLengths,
param.reduceDims,
ReduceOpIdType,
param.propagateNan,
param.useIndex,
param.alpha,
param.beta);
EXPECT_TRUE(success);
}
}
};
if(inLengths.size() != 4 || template <typename T>
(reduceDims.size() != 1 && reduceDims.size() != 3 && reduceDims.size() != 4)) std::vector<ReduceParam> ReduceWithIndexTest<T>::params = {};
return (-1);
if(data_type != 0 && data_type != 1 && data_type != 3 && data_type != 5 && data_type != 6) using Reduce_float_types = ::testing::Types<std::tuple<float, float, float>>;
return (-1); using Reduce_double_types = ::testing::Types<std::tuple<double, double, double>>;
using Reduce_int8t_types = ::testing::Types<std::tuple<int8_t, int8_t, int8_t>>;
using Reduce_half_types = ::testing::Types<std::tuple<ck::half_t, ck::half_t, ck::half_t>>;
using Reduce_bhalf_float_Types = ::testing::Types<std::tuple<ck::bhalf_t, float, ck::bhalf_t>>;
return (0); template <typename TType>
}; class ReduceWithIndexFloat : public ReduceWithIndexTest<TType>
{
}; };
bool test_reduce_with_index(int data_type, template <typename TType>
int init_method, class ReduceWithIndexDouble : public ReduceWithIndexTest<TType>
std::vector<int> reduceDims,
std::vector<size_t> inLengths,
ReduceTensorOp reduceOpId,
bool propagateNan,
float alpha,
float beta)
{ {
using ck::profiler::profile_reduce_impl; };
bool result = true; template <typename TType>
class ReduceWithIndexInt8 : public ReduceWithIndexTest<TType>
{
};
if(data_type == 0) template <typename TType>
{ class ReduceWithIndexHalf : public ReduceWithIndexTest<TType>
result = profile_reduce_impl<float, float, float>(true, {
init_method, };
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
else if(data_type == 1)
{
result = profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
else if(data_type == 3)
{
result = profile_reduce_impl<int8_t, int8_t, int8_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
else if(data_type == 5)
{
result = profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
else if(data_type == 6)
{
result = profile_reduce_impl<double, double, double>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
return (result); template <typename TType>
class ReduceWithIndexBHalfFloat : public ReduceWithIndexTest<TType>
{
}; };
constexpr ReduceTensorOp reduceOpId = ReduceTensorOp::AMAX; TYPED_TEST_SUITE(ReduceWithIndexFloat, Reduce_float_types);
constexpr bool propagateNan = false; TYPED_TEST_SUITE(ReduceWithIndexDouble, Reduce_double_types);
TYPED_TEST_SUITE(ReduceWithIndexInt8, Reduce_int8t_types);
TYPED_TEST_SUITE(ReduceWithIndexHalf, Reduce_half_types);
TYPED_TEST_SUITE(ReduceWithIndexBHalfFloat, Reduce_bhalf_float_Types);
int main(int argc, char* argv[]) TYPED_TEST(ReduceWithIndexFloat, ReduceWithIndexTestFloat_AMAX)
{ {
SimpleAppArgs args; // trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
bool result = true; TYPED_TEST(ReduceWithIndexFloat, ReduceWithIndexTestFloat_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
if(argc == 1) TYPED_TEST(ReduceWithIndexFloat, ReduceWithIndexTestFloat_MAX)
{ {
int data_type = 1; // trigger Run() -> Generic
int init_method = 2; this->template Run<ReduceTensorOp::MAX>();
std::vector<size_t> inLengths{64, 4, 280, 80}; }
std::vector<std::vector<int>> v_reduceDims{
{0, 1, 2, 3}, {0, 1, 2}, {1, 2, 3}, {0, 1, 3}, {0, 2, 3}, {0}, {1}, {2}, {3}}; TYPED_TEST(ReduceWithIndexDouble, ReduceWithIndexTestDouble_AMAX)
{
for(auto& reduceDims : v_reduceDims) // trigger Run() -> Generic
result = result && test_reduce_with_index(data_type, this->template Run<ReduceTensorOp::AMAX>();
init_method, }
reduceDims,
inLengths, TYPED_TEST(ReduceWithIndexDouble, ReduceWithIndexTestDouble_MIN)
reduceOpId, {
propagateNan, // trigger Run() -> Generic
1.0f, this->template Run<ReduceTensorOp::MIN>();
0.0f); }
}
else TYPED_TEST(ReduceWithIndexDouble, ReduceWithIndexTestDouble_MAX)
{ {
if(args.processArgs(argc, argv) < 0) // trigger Run() -> Generic
{ this->template Run<ReduceTensorOp::MAX>();
throw std::runtime_error( }
"Invalid input arguments, test_reduce_with_index could not be executed!");
}; TYPED_TEST(ReduceWithIndexInt8, ReduceWithIndexTestInt8_AMAX)
{
result = test_reduce_with_index(args.data_type, // trigger Run() -> Generic
args.init_method, this->template Run<ReduceTensorOp::AMAX>();
args.reduceDims, }
args.inLengths,
reduceOpId, TYPED_TEST(ReduceWithIndexInt8, ReduceWithIndexTestInt8_MIN)
propagateNan, {
args.scales[0], // trigger Run() -> Generic
args.scales[1]); this->template Run<ReduceTensorOp::MIN>();
} }
std::cout << "test_reduce_with_index ..... " << (result ? "SUCCESS" : "FAILURE") << std::endl; TYPED_TEST(ReduceWithIndexInt8, ReduceWithIndexTestInt8_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
}
TYPED_TEST(ReduceWithIndexHalf, ReduceWithIndexTestHalf_AMAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
TYPED_TEST(ReduceWithIndexHalf, ReduceWithIndexTestHalf_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
TYPED_TEST(ReduceWithIndexHalf, ReduceWithIndexTestHalf_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
}
TYPED_TEST(ReduceWithIndexBHalfFloat, ReduceWithIndexTesBtHalfFloat_AMAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
return (result ? 0 : -1); TYPED_TEST(ReduceWithIndexBHalfFloat, ReduceWithIndexTestBHalfFloat_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
TYPED_TEST(ReduceWithIndexBHalfFloat, ReduceWithIndexTestBHalfFloat_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
} }
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "test/smfmac_op/smfmac_op_util.hpp" #include "test/smfmac_op/smfmac_op_util.hpp"
#include "ck/host_utility/device_prop.hpp"
using BF16 = ck::bhalf_t; using BF16 = ck::bhalf_t;
using F16 = ck::half_t; using F16 = ck::half_t;
...@@ -38,40 +39,43 @@ class TestSmfmac : public ::testing::Test ...@@ -38,40 +39,43 @@ class TestSmfmac : public ::testing::Test
void Run() void Run()
{ {
bool pass = true; bool pass = true;
constexpr auto matmul_default = ck::smfmac_op_util::matmul<Src1Type, if(ck::get_device_name() == "gfx942")
Src1VecSize, {
Src2Type, constexpr auto matmul_default = ck::smfmac_op_util::matmul<Src1Type,
Src2VecSize, Src1VecSize,
GPUAccType, Src2Type,
AccVecSize, Src2VecSize,
DstType, GPUAccType,
M, AccVecSize,
N, DstType,
K>; M,
N,
K>;
constexpr auto smfmac_kernel_container = std::make_tuple(matmul_default); constexpr auto smfmac_kernel_container = std::make_tuple(matmul_default);
ck::static_for<0, std::tuple_size_v<decltype(smfmac_kernel_container)>, 1>{}([&](auto i) {
pass &= ck::smfmac_op_util::TestSmfmac<
std::tuple_element_t<i.value, decltype(smfmac_kernel_container)>,
Src1Type,
Src2Type,
DstType,
GPUAccType,
CPUAccType,
decltype(Row{}),
decltype(Row{}),
decltype(Row{}),
PassThrough,
PassThrough,
PassThrough,
AccVecSize,
M,
N,
K>{}(std::get<ck::Number<i>{}>(smfmac_kernel_container));
});
ck::static_for<0, std::tuple_size_v<decltype(smfmac_kernel_container)>, 1>{}(
[&](auto i) {
pass &= ck::smfmac_op_util::TestSmfmac<
std::tuple_element_t<i.value, decltype(smfmac_kernel_container)>,
Src1Type,
Src2Type,
DstType,
GPUAccType,
CPUAccType,
decltype(Row{}),
decltype(Row{}),
decltype(Row{}),
PassThrough,
PassThrough,
PassThrough,
AccVecSize,
M,
N,
K>{}(std::get<ck::Number<i>{}>(smfmac_kernel_container));
});
}
EXPECT_TRUE(pass); EXPECT_TRUE(pass);
} }
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment