Commit bd689f40 authored by illsilin's avatar illsilin
Browse files

merge from public repo

parents c160c6cf a94113a9
...@@ -51,6 +51,21 @@ print_log_header $gemm_log $env_type $branch $host_name ...@@ -51,6 +51,21 @@ print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log ./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
#run grouped_fwd fp16 tests
export grouped_conv_fwd_log="perf_grouped_conv_fwd_fp16.log"
print_log_header $conv_fwd_log $env_type $branch $host_name
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_log
#run grouped_bwd_data fp16 tests
export grouped_conv_bwd_data_log="perf_grouped_conv_bwd_data_fp16.log"
print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
#run grouped_bwd_weight fp16 tests
export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight_fp16.log"
print_log_header $grouped_conv_bwd_weight_log $env_type $branch $host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1 $verify 1 0 1 256 1 2>&1 | tee -a $grouped_conv_bwd_weight_log
#run resnet50 tests #run resnet50 tests
export resnet256_log="perf_resnet50_N256.log" export resnet256_log="perf_resnet50_N256.log"
print_log_header $resnet256_log $env_type $branch $host_name print_log_header $resnet256_log $env_type $branch $host_name
......
#!/bin/bash
## The following will be used for CI
set -x
## for float
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 0 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 0 2
## for float64
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 6 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 6 2
## for float16
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 1 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 1 2
## for int8_t
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 3 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 3 2
## for bfloat16
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2,3 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,2 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,1,3 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0,2,3 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1,2,3 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 0 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 1 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 2 5 2
bin/test_reduce_with_index -D 64,4,280,82 -R 3 5 2
set +x
...@@ -68,11 +68,11 @@ function(add_test_executable TEST_NAME) ...@@ -68,11 +68,11 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list #only continue if there are some source files left on the list
if(ARGN) if(ARGN)
if(ARGN MATCHES "_xdl") if(ARGN MATCHES "_xdl")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
elseif(ARGN MATCHES "_wmma") elseif(ARGN MATCHES "_wmma")
list(REMOVE_ITEM TEST_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) list(REMOVE_ITEM TEST_TARGETS gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030)
elseif(ARGN MATCHES "_smfmac") elseif(ARGN MATCHES "_smfmac")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a) list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201)
endif() endif()
set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_executable(${TEST_NAME} ${ARGN}) add_executable(${TEST_NAME} ${ARGN})
...@@ -149,11 +149,11 @@ function(add_gtest_executable TEST_NAME) ...@@ -149,11 +149,11 @@ function(add_gtest_executable TEST_NAME)
#only continue if there are some source files left on the list #only continue if there are some source files left on the list
if(ARGN) if(ARGN)
if(ARGN MATCHES "_xdl") if(ARGN MATCHES "_xdl")
list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
elseif(ARGN MATCHES "_wmma") elseif(ARGN MATCHES "_wmma")
list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030) list(REMOVE_ITEM TEST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030)
elseif(ARGN MATCHES "_smfmac") elseif(ARGN MATCHES "_smfmac")
list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a) list(REMOVE_ITEM TEST_TARGETS gfx1030 gfx1100 gfx1101 gfx1102 gfx1103 gfx908 gfx90a gfx1200 gfx1201)
endif() endif()
set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
add_executable(${TEST_NAME} ${ARGN}) add_executable(${TEST_NAME} ${ARGN})
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <string> #include <string>
...@@ -24,12 +24,12 @@ class TestConvUtil : public ::testing::Test ...@@ -24,12 +24,12 @@ class TestConvUtil : public ::testing::Test
128, 128,
192, 192,
256, 256,
std::vector<ck::index_t>(ndims, 3), std::vector<ck::long_index_t>(ndims, 3),
std::vector<ck::index_t>(ndims, 71), std::vector<ck::long_index_t>(ndims, 71),
std::vector<ck::index_t>(ndims, s), std::vector<ck::long_index_t>(ndims, s),
std::vector<ck::index_t>(ndims, d), std::vector<ck::long_index_t>(ndims, d),
std::vector<ck::index_t>(ndims, p), std::vector<ck::long_index_t>(ndims, p),
std::vector<ck::index_t>(ndims, p)); std::vector<ck::long_index_t>(ndims, p));
} }
protected: protected:
...@@ -48,35 +48,35 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D) ...@@ -48,35 +48,35 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D)
{ {
// stride 2, dilation 1, pad 1 // stride 2, dilation 1, pad 1
SetNDParams(1, 2, 1, 1); SetNDParams(1, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::long_index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D.")); out_spatial_len, std::vector<ck::long_index_t>{36}, "Error: ConvParams 1D."));
// stride 1, dilation 1, pad 1 // stride 1, dilation 1, pad 1
SetNDParams(1, 1, 1, 1); SetNDParams(1, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}.")); out_spatial_len, std::vector<ck::long_index_t>{71}, "Error: ConvParams 1D stride {1}."));
// stride 2, dilation 1, pad 2 // stride 2, dilation 1, pad 2
SetNDParams(1, 2, 1, 2); SetNDParams(1, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37}, std::vector<ck::long_index_t>{37},
"Error: ConvParams 1D padding left/right {2}.")); "Error: ConvParams 1D padding left/right {2}."));
// stride 2, dilation 2, pad 2 // stride 2, dilation 2, pad 2
SetNDParams(1, 2, 2, 2); SetNDParams(1, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}.")); out_spatial_len, std::vector<ck::long_index_t>{36}, "Error: ConvParams 1D dilation {2}."));
// stride 3, dilation 2, pad 1 // stride 3, dilation 2, pad 1
SetNDParams(1, 3, 2, 1); SetNDParams(1, 3, 2, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE( EXPECT_TRUE(
ck::utils::check_err(out_spatial_len, ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23}, std::vector<ck::long_index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}.")); "Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."));
} }
...@@ -84,36 +84,38 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D) ...@@ -84,36 +84,38 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
{ {
// stride 2, dilation 1, pad 1 // stride 2, dilation 1, pad 1
SetNDParams(2, 2, 1, 1); SetNDParams(2, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::long_index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36}, std::vector<ck::long_index_t>{36, 36},
"Error: ConvParams 2D default constructor.")); "Error: ConvParams 2D default constructor."));
// stride 1, dilation 1, pad 1 // stride 1, dilation 1, pad 1
SetNDParams(2, 1, 1, 1); SetNDParams(2, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}.")); std::vector<ck::long_index_t>{71, 71},
"Error: ConvParams 2D stride {1,1}."));
// stride 2, dilation 1, pad 2 // stride 2, dilation 1, pad 2
SetNDParams(2, 2, 1, 2); SetNDParams(2, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37}, std::vector<ck::long_index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}.")); "Error: ConvParams 2D padding left/right {2,2}."));
// stride 2, dilation 2, pad 2 // stride 2, dilation 2, pad 2
SetNDParams(2, 2, 2, 2); SetNDParams(2, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}.")); std::vector<ck::long_index_t>{36, 36},
"Error: ConvParams 2D dilation {2,2}."));
// stride 3, dilation 2, pad 1 // stride 3, dilation 2, pad 1
SetNDParams(2, 3, 2, 1); SetNDParams(2, 3, 2, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE( EXPECT_TRUE(
ck::utils::check_err(out_spatial_len, ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23, 23}, std::vector<ck::long_index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}.")); "Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
} }
...@@ -121,29 +123,29 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D) ...@@ -121,29 +123,29 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
{ {
// stride 2, dilation 1, pad 1 // stride 2, dilation 1, pad 1
SetNDParams(3, 2, 1, 1); SetNDParams(3, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths(); std::vector<ck::long_index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D.")); out_spatial_len, std::vector<ck::long_index_t>{36, 36, 36}, "Error: ConvParams 3D."));
// stride 1, dilation 1, pad 1 // stride 1, dilation 1, pad 1
SetNDParams(3, 1, 1, 1); SetNDParams(3, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71}, std::vector<ck::long_index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}.")); "Error: ConvParams 3D stride {1, 1, 1}."));
// stride 2, dilation 1, pad 2 // stride 2, dilation 1, pad 2
SetNDParams(3, 2, 1, 2); SetNDParams(3, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37}, std::vector<ck::long_index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}.")); "Error: ConvParams 3D padding left/right {2, 2, 2}."));
// stride 2, dilation 2, pad 2 // stride 2, dilation 2, pad 2
SetNDParams(3, 2, 2, 2); SetNDParams(3, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len, EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36}, std::vector<ck::long_index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}.")); "Error: ConvParams 3D dilation {2, 2, 2}."));
// stride 3, dilation 2, pad 1 // stride 3, dilation 2, pad 1
...@@ -151,6 +153,6 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D) ...@@ -151,6 +153,6 @@ TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
out_spatial_len = conv_params.GetOutputSpatialLengths(); out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err( EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, out_spatial_len,
std::vector<ck::index_t>{23, 23, 23}, std::vector<ck::long_index_t>{23, 23, 23},
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}.")); "Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."));
} }
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
...@@ -25,12 +25,13 @@ class TestGemmUniversal : public testing::Test ...@@ -25,12 +25,13 @@ class TestGemmUniversal : public testing::Test
using F32 = float; using F32 = float;
protected: protected:
using ALayout = std::tuple_element_t<0, Tuple>; using ALayout = std::tuple_element_t<0, Tuple>;
using BLayout = std::tuple_element_t<1, Tuple>; using BLayout = std::tuple_element_t<1, Tuple>;
using CLayout = Row; using CLayout = Row;
using ADataType = std::tuple_element_t<2, Tuple>; using ADataType = std::tuple_element_t<2, Tuple>;
using BDataType = std::tuple_element_t<3, Tuple>; using BDataType = std::tuple_element_t<3, Tuple>;
using CDataType = std::tuple_element_t<4, Tuple>; using ComputeDataType = std::tuple_element_t<4, Tuple>;
using CDataType = std::tuple_element_t<5, Tuple>;
public: public:
static constexpr bool verify_ = true; static constexpr bool verify_ = true;
...@@ -66,6 +67,7 @@ class TestGemmUniversal : public testing::Test ...@@ -66,6 +67,7 @@ class TestGemmUniversal : public testing::Test
{ {
bool pass = ck::profiler::profile_gemm_universal_impl<ADataType, bool pass = ck::profiler::profile_gemm_universal_impl<ADataType,
BDataType, BDataType,
ComputeDataType,
F32, F32,
CDataType, CDataType,
ALayout, ALayout,
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple> #include <tuple>
...@@ -41,16 +41,29 @@ class TestGemmUniversal_MK_NK ...@@ -41,16 +41,29 @@ class TestGemmUniversal_MK_NK
}; };
// clang-format off // clang-format off
using KernelTypes = ::testing::Types< using KernelTypes_MK_KN = ::testing::Types<
// ADataType, BDataType, CDataType // ADataType, BDataType, ComputeDataType, CDataType
std::tuple< F16, F16, F16>, std::tuple< F16, F16, F16, F16>,
std::tuple< F16, F8, F16>, #if (defined CK_ENABLE_FP8)
std::tuple< F8, F16, F16>, std::tuple< F16, F8, F16, F16>,
std::tuple< BF16, BF16, BF16> std::tuple< F8, F16, F16, F16>,
std::tuple< F8, F8, F8, BF16>,
#endif
std::tuple< BF16, BF16, BF16, BF16>
>;
using KernelTypes_MK_NK = ::testing::Types<
// ADataType, BDataType, ComputeDataType, CDataType
std::tuple< F16, F16, F16, F16>,
#if (defined CK_ENABLE_FP8)
std::tuple< F16, F8, F16, F16>,
std::tuple< F8, F16, F16, F16>,
std::tuple< F8, F8, F8, BF16>,
#endif
std::tuple< BF16, BF16, BF16, BF16>
>; >;
// clang-format on // clang-format on
TYPED_TEST_SUITE(TestGemmUniversal_MK_KN, KernelTypes); TYPED_TEST_SUITE(TestGemmUniversal_MK_KN, KernelTypes_MK_KN);
TYPED_TEST_SUITE(TestGemmUniversal_MK_NK, KernelTypes); TYPED_TEST_SUITE(TestGemmUniversal_MK_NK, KernelTypes_MK_NK);
#include "test_gemm_universal_ut_cases.inc" #include "test_gemm_universal_ut_cases.inc"
...@@ -7,6 +7,12 @@ if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11") ...@@ -7,6 +7,12 @@ if(GPU_TARGETS MATCHES "gfx9" OR GPU_TARGETS MATCHES "gfx11")
endif() endif()
endif() endif()
if(GPU_TARGETS MATCHES "gfx9")
add_executable(test_grouped_convnd_fwd_large_cases_xdl test_grouped_convnd_fwd_large_cases_xdl.cpp)
target_compile_options(test_grouped_convnd_fwd_large_cases_xdl PRIVATE -Wno-global-constructors -Wno-undef)
target_link_libraries(test_grouped_convnd_fwd_large_cases_xdl PRIVATE gtest_main getopt::getopt utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance)
endif()
add_gtest_executable(test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp) add_gtest_executable(test_grouped_convnd_fwd_multi_ab_interface test_grouped_convnd_fwd_multi_ab_interface.cpp)
if(result EQUAL 0) if(result EQUAL 0)
target_link_libraries(test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility) target_link_libraries(test_grouped_convnd_fwd_multi_ab_interface PRIVATE utility)
......
...@@ -17,6 +17,7 @@ class TestGroupedConvndFwd : public ::testing::Test ...@@ -17,6 +17,7 @@ class TestGroupedConvndFwd : public ::testing::Test
using InLayout = std::tuple_element_t<1, Tuple>; using InLayout = std::tuple_element_t<1, Tuple>;
using WeiLayout = std::tuple_element_t<2, Tuple>; using WeiLayout = std::tuple_element_t<2, Tuple>;
using OutLayout = std::tuple_element_t<3, Tuple>; using OutLayout = std::tuple_element_t<3, Tuple>;
using IndexType = ck::index_t;
std::vector<ck::utils::conv::ConvParam> conv_params; std::vector<ck::utils::conv::ConvParam> conv_params;
...@@ -33,7 +34,10 @@ class TestGroupedConvndFwd : public ::testing::Test ...@@ -33,7 +34,10 @@ class TestGroupedConvndFwd : public ::testing::Test
OutLayout, OutLayout,
DataType, DataType,
DataType, DataType,
DataType>( DataType,
DataType,
DataType,
IndexType>(
true, // do_verification true, // do_verification
1, // init_method: integer value 1, // init_method: integer value
false, // do_log false, // do_log
...@@ -69,8 +73,6 @@ using KernelTypes3d = ::testing::Types<std::tuple<float, GNDHWC, GKZYXC, GNDHWK> ...@@ -69,8 +73,6 @@ using KernelTypes3d = ::testing::Types<std::tuple<float, GNDHWC, GKZYXC, GNDHWK>
std::tuple<ck::bhalf_t, NDHWGC, GKZYXC, NDHWGK>, std::tuple<ck::bhalf_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<int8_t, NDHWGC, GKZYXC, NDHWGK>>; std::tuple<int8_t, NDHWGC, GKZYXC, NDHWGK>>;
using KernelTypes2dLargeCases = ::testing::Types<std::tuple<float, NHWGC, GKYXC, NHWGK>>;
template <typename Tuple> template <typename Tuple>
class TestGroupedConvndFwd1d : public TestGroupedConvndFwd<Tuple> class TestGroupedConvndFwd1d : public TestGroupedConvndFwd<Tuple>
{ {
...@@ -86,15 +88,9 @@ class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple> ...@@ -86,15 +88,9 @@ class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple>
{ {
}; };
template <typename Tuple>
class TestGroupedConvndFwd2dLargeCases : public TestGroupedConvndFwd<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwd1d, KernelTypes1d); TYPED_TEST_SUITE(TestGroupedConvndFwd1d, KernelTypes1d);
TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d); TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d);
TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d); TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d);
TYPED_TEST_SUITE(TestGroupedConvndFwd2dLargeCases, KernelTypes2dLargeCases);
TYPED_TEST(TestGroupedConvndFwd1d, Test1D) TYPED_TEST(TestGroupedConvndFwd1d, Test1D)
{ {
...@@ -104,6 +100,7 @@ TYPED_TEST(TestGroupedConvndFwd1d, Test1D) ...@@ -104,6 +100,7 @@ TYPED_TEST(TestGroupedConvndFwd1d, Test1D)
this->conv_params.push_back({1, 2, 32, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}}); this->conv_params.push_back({1, 2, 32, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
this->conv_params.push_back({1, 1, 1, 1, 32, {3}, {32}, {1}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 1, 1, 32, {3}, {32}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 1, 1, 64, 3, {3}, {32}, {1}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 1, 64, 3, {3}, {32}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 96, 1, 1, 1, {3}, {512}, {1}, {1}, {1}, {1}});
this->template Run<1>(); this->template Run<1>();
} }
...@@ -119,6 +116,8 @@ TYPED_TEST(TestGroupedConvndFwd2d, Test2D) ...@@ -119,6 +116,8 @@ TYPED_TEST(TestGroupedConvndFwd2d, Test2D)
this->conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}); this->conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}); this->conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}); this->conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back(
{2, 96, 1, 1, 1, {3, 3}, {120, 160}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->template Run<2>(); this->template Run<2>();
} }
...@@ -137,13 +136,7 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D) ...@@ -137,13 +136,7 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}); {3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back( this->conv_params.push_back(
{3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}); {3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
TYPED_TEST(TestGroupedConvndFwd2dLargeCases, Test2DLargeCases)
{
// Case larger than 2GB
this->conv_params.push_back( this->conv_params.push_back(
{2, 1, 64, 4, 192, {2, 2}, {224, 224}, {224, 224}, {0, 0}, {0, 0}, {0, 0}}); {3, 96, 1, 1, 1, {3, 3, 3}, {4, 30, 160}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<2>(); this->template Run<3>();
} }
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
{
protected:
using DataType = std::tuple_element_t<0, Tuple>;
using InLayout = std::tuple_element_t<1, Tuple>;
using WeiLayout = std::tuple_element_t<2, Tuple>;
using OutLayout = std::tuple_element_t<3, Tuple>;
using IndexType = ck::long_index_t;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType,
DataType,
DataType,
IndexType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes2d = ::testing::Types<std::tuple<float, NHWGC, GKYXC, NHWGK>,
std::tuple<ck::half_t, NHWGC, GKYXC, NHWGK>,
std::tuple<ck::bhalf_t, NHWGC, GKYXC, NHWGK>>;
using KernelTypes3d = ::testing::Types<std::tuple<float, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::half_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::bhalf_t, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwd2d : public TestGroupedConvndFwd<Tuple>
{
};
template <typename Tuple>
class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d);
TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwd2d, Test2D)
{
// Case larger than 2GB
this->conv_params.push_back(
{2, 1, 128, 4, 192, {2, 2}, {224, 224}, {224, 224}, {1, 1}, {0, 0}, {0, 0}});
// With supported NumGroupsToMerge > 1
this->conv_params.push_back(
{2, 32, 64, 1, 1, {2, 2}, {672, 672}, {672, 672}, {1, 1}, {0, 0}, {0, 0}});
// When image is larger than 2GB
this->conv_params.push_back(
{2, 2, 2, 128, 128, {3, 3}, {4096, 2048}, {300, 300}, {3, 3}, {1, 1}, {1, 1}});
this->template Run<2>();
}
TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{
// Case larger than 2GB
this->conv_params.push_back({3,
1,
128,
4,
192,
{2, 2, 2},
{2, 224, 224},
{1, 224, 224},
{1, 1, 1},
{0, 0, 0},
{0, 0, 0}});
// With supported NumGroupsToMerge > 1
this->conv_params.push_back({3,
32,
64,
1,
1,
{2, 2, 2},
{360, 2, 672},
{360, 2, 672},
{1, 1, 1},
{0, 0, 0},
{0, 0, 0}});
// When image is larger than 2GB
this->conv_params.push_back({3,
1,
2,
128,
128,
{3, 1, 3},
{900, 2, 2048},
{300, 1, 300},
{3, 2, 3},
{1, 1, 1},
{1, 1, 1}});
this->template Run<3>();
}
add_test_executable(test_reduce_no_index reduce_no_index.cpp) add_gtest_executable(test_reduce_no_index reduce_no_index.cpp)
add_test_executable(test_reduce_with_index reduce_with_index.cpp) add_gtest_executable(test_reduce_with_index reduce_with_index.cpp)
target_link_libraries(test_reduce_no_index PRIVATE utility device_reduce_instance) target_link_libraries(test_reduce_no_index PRIVATE utility device_reduce_instance)
target_link_libraries(test_reduce_with_index PRIVATE utility device_reduce_instance) target_link_libraries(test_reduce_with_index PRIVATE utility device_reduce_instance)
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <getopt.h> #include <getopt.h>
#include "ck/library/utility/host_common_util.hpp" #include "ck/library/utility/host_common_util.hpp"
#include "profiler/profile_reduce_impl.hpp" #include "profiler/profile_reduce_impl.hpp"
#include <gtest/gtest.h>
using namespace ck; using namespace ck;
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'}, struct ReduceParam
{"reduceDimensions", required_argument, nullptr, 'R'}, {
{"scales", required_argument, nullptr, 'S'}, bool do_verification{true};
{"help", no_argument, nullptr, '?'}, bool propagateNan{false};
{nullptr, 0, nullptr, 0}}; bool useIndex{false};
bool time_kernel{false};
bool do_dumpout{false};
int init_method{2};
float alpha{1.0f};
float beta{0.0f};
std::vector<size_t> inLengths{64, 4, 280, 82};
std::vector<int> reduceDims{0, 1, 2, 3};
};
class SimpleAppArgs std::vector<std::vector<int>> SetGenericReduceDim()
{ {
private: return {{0, 1, 2, 3}, {0, 1, 2}, {0, 1, 3}, {0, 2, 3}, {1, 2, 3}, {0}, {1}, {2}, {3}};
int option_index = 0; }
public: template <typename T>
std::vector<size_t> inLengths; class ReduceWithIndexTest : public ::testing::Test
std::vector<int> reduceDims; {
std::vector<float> scales; protected:
using InDataType = std::tuple_element_t<0, T>;
using AccDataType = std::tuple_element_t<1, T>;
using OutDataType = std::tuple_element_t<2, T>;
int data_type; static std::vector<ReduceParam> params;
int init_method = 1;
public: static void SetUpTestSuite()
void show_usage(const char* cmd)
{ {
std::cout << "Usage of " << cmd << std::endl; // set testcase variables
std::cout << "--inLengths or -D, comma separated list of input tensor dimension lengths " ReduceParam set;
"(only 4-d tensor supported)" const auto setReduceDim = SetGenericReduceDim();
<< std::endl;
std::cout << "--reduceDimensions or -R comma seperated list of dimension indexes to reduce "
"(only 1 or 3 or 4 dimensions supported)"
<< std::endl;
std::cout << "--scales or -S, comma separated two float values for alpha and beta"
<< std::endl;
std::cout << "Arg1 -- data type (0: fp16, 1: fp32, 3: int8, 5: bp16, 6: fp64)" << std::endl;
std::cout << "Arg2 -- init method(0=no init, 1=single integer value, 2=scope integer "
"value, 3=decimal value)"
<< std::endl;
};
int processArgs(int argc, char* argv[])
{
using ck::host_common::getTypeValuesFromString;
int ch;
while(1) for(std::size_t i(0); i < setReduceDim.size(); ++i)
{ {
ch = getopt_long(argc, argv, "D:R:S:", long_options, &option_index); set.reduceDims = setReduceDim[i];
if(ch == -1) params.emplace_back(set);
break; }
switch(ch) }
{
case 'D': template <ReduceTensorOp ReduceOpIdType>
if(!optarg) void Run()
throw std::runtime_error("Invalid option format!"); {
for(auto param : this->params)
inLengths = getTypeValuesFromString<size_t>(optarg);
break;
case 'R':
if(!optarg)
throw std::runtime_error("Invalid option format!");
reduceDims = getTypeValuesFromString<int>(optarg);
break;
case 'S':
if(!optarg)
throw std::runtime_error("Invalid option format!");
scales = getTypeValuesFromString<float>(optarg);
break;
case '?':
if(std::string(long_options[option_index].name) == "help")
{
show_usage(argv[0]);
return (-1);
};
break;
default: show_usage(argv[0]); return (-1);
};
};
if(optind + 2 > argc)
throw std::runtime_error("Invalid cmd-line arguments, more argumetns are needed!");
data_type = std::atoi(argv[optind++]);
init_method = std::atoi(argv[optind]);
if(scales.empty())
{ {
scales.push_back(1.0f); bool success = ck::profiler::profile_reduce_impl<InDataType, AccDataType, OutDataType>(
scales.push_back(0.0f); param.do_verification,
}; param.init_method,
param.do_dumpout,
param.time_kernel,
param.inLengths,
param.reduceDims,
ReduceOpIdType,
param.propagateNan,
param.useIndex,
param.alpha,
param.beta);
EXPECT_TRUE(success);
}
}
};
if(inLengths.size() != 4 || template <typename T>
(reduceDims.size() != 1 && reduceDims.size() != 3 && reduceDims.size() != 4)) std::vector<ReduceParam> ReduceWithIndexTest<T>::params = {};
return (-1);
if(data_type != 0 && data_type != 1 && data_type != 3 && data_type != 5 && data_type != 6) using Reduce_float_types = ::testing::Types<std::tuple<float, float, float>>;
return (-1); using Reduce_double_types = ::testing::Types<std::tuple<double, double, double>>;
using Reduce_int8t_types = ::testing::Types<std::tuple<int8_t, int8_t, int8_t>>;
using Reduce_half_types = ::testing::Types<std::tuple<ck::half_t, ck::half_t, ck::half_t>>;
using Reduce_bhalf_float_Types = ::testing::Types<std::tuple<ck::bhalf_t, float, ck::bhalf_t>>;
return (0); template <typename TType>
}; class ReduceWithNoIndexFloat : public ReduceWithIndexTest<TType>
{
}; };
bool test_reduce_no_index(int data_type, template <typename TType>
int init_method, class ReduceWithNoIndexDouble : public ReduceWithIndexTest<TType>
std::vector<int> reduceDims,
std::vector<size_t> inLengths,
ReduceTensorOp reduceOpId,
bool propagateNan,
float alpha,
float beta)
{ {
using ck::profiler::profile_reduce_impl; };
bool result = true; template <typename TType>
class ReduceWithNoIndexInt8 : public ReduceWithIndexTest<TType>
{
};
if(data_type == 0) template <typename TType>
{ class ReduceWithNoIndexHalf : public ReduceWithIndexTest<TType>
result = profile_reduce_impl<float, float, float>(true, {
init_method, };
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
else if(data_type == 1)
{
result = profile_reduce_impl<ck::half_t, float, ck::half_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
else if(data_type == 3)
{
result = profile_reduce_impl<int8_t, int32_t, int8_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
else if(data_type == 5)
{
result = profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
else if(data_type == 6)
{
result = profile_reduce_impl<double, double, double>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
false,
alpha,
beta);
}
return (result); template <typename TType>
class ReduceWithNoIndexBHalfFloat : public ReduceWithIndexTest<TType>
{
}; };
constexpr ReduceTensorOp reduceOpId = ReduceTensorOp::AVG; TYPED_TEST_SUITE(ReduceWithNoIndexFloat, Reduce_float_types);
constexpr bool propagateNan = false; TYPED_TEST_SUITE(ReduceWithNoIndexDouble, Reduce_double_types);
TYPED_TEST_SUITE(ReduceWithNoIndexInt8, Reduce_int8t_types);
TYPED_TEST_SUITE(ReduceWithNoIndexHalf, Reduce_half_types);
TYPED_TEST_SUITE(ReduceWithNoIndexBHalfFloat, Reduce_bhalf_float_Types);
int main(int argc, char* argv[]) TYPED_TEST(ReduceWithNoIndexFloat, ReduceWithNoIndexTestFloat_AMAX)
{ {
SimpleAppArgs args; // trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
bool result = true; TYPED_TEST(ReduceWithNoIndexFloat, ReduceWithNoIndexTestFloat_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
if(argc == 1) TYPED_TEST(ReduceWithNoIndexFloat, ReduceWithNoIndexTestFloat_MAX)
{ {
int data_type = 1; // trigger Run() -> Generic
int init_method = 2; this->template Run<ReduceTensorOp::MAX>();
std::vector<size_t> inLengths{64, 4, 280, 80}; }
std::vector<std::vector<int>> v_reduceDims{
{0, 1, 2, 3}, {0, 1, 2}, {1, 2, 3}, {0, 1, 3}, {0, 2, 3}, {0}, {1}, {2}, {3}}; TYPED_TEST(ReduceWithNoIndexDouble, ReduceWithNoIndexTestDouble_AMAX)
{
for(auto& reduceDims : v_reduceDims) // trigger Run() -> Generic
result = result && test_reduce_no_index(data_type, this->template Run<ReduceTensorOp::AMAX>();
init_method, }
reduceDims,
inLengths, TYPED_TEST(ReduceWithNoIndexDouble, ReduceWithNoIndexTestDouble_MIN)
reduceOpId, {
propagateNan, // trigger Run() -> Generic
1.0f, this->template Run<ReduceTensorOp::MIN>();
0.0f); }
}
else TYPED_TEST(ReduceWithNoIndexDouble, ReduceWithNoIndexTestDouble_MAX)
{ {
if(args.processArgs(argc, argv) < 0) // trigger Run() -> Generic
{ this->template Run<ReduceTensorOp::MAX>();
throw std::runtime_error( }
"Invalid input arguments, test_reduce_no_index could not be executed!");
}; TYPED_TEST(ReduceWithNoIndexInt8, ReduceWithNoIndexTestInt8_AMAX)
{
result = test_reduce_no_index(args.data_type, // trigger Run() -> Generic
args.init_method, this->template Run<ReduceTensorOp::AMAX>();
args.reduceDims, }
args.inLengths,
reduceOpId, TYPED_TEST(ReduceWithNoIndexInt8, ReduceWithNoIndexTestInt8_MIN)
propagateNan, {
args.scales[0], // trigger Run() -> Generic
args.scales[1]); this->template Run<ReduceTensorOp::MIN>();
} }
std::cout << "test_reduce_no_index ..... " << (result ? "SUCCESS" : "FAILURE") << std::endl; TYPED_TEST(ReduceWithNoIndexInt8, ReduceWithNoIndexTestInt8_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
}
TYPED_TEST(ReduceWithNoIndexHalf, ReduceWithNoIndexTestHalf_AMAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
TYPED_TEST(ReduceWithNoIndexHalf, ReduceWithNoIndexTestHalf_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
TYPED_TEST(ReduceWithNoIndexHalf, ReduceWithNoIndexTestHalf_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
}
TYPED_TEST(ReduceWithNoIndexBHalfFloat, ReduceWithNoIndexTesBtHalfFloat_AMAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
return (result ? 0 : -1); TYPED_TEST(ReduceWithNoIndexBHalfFloat, ReduceWithNoIndexTestBHalfFloat_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
TYPED_TEST(ReduceWithNoIndexBHalfFloat, ReduceWithNoIndexTestBHalfFloat_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
} }
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <getopt.h> #include <getopt.h>
#include "ck/library/utility/host_common_util.hpp" #include "ck/library/utility/host_common_util.hpp"
#include "profiler/profile_reduce_impl.hpp" #include "profiler/profile_reduce_impl.hpp"
#include <gtest/gtest.h>
using namespace ck; using namespace ck;
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'}, struct ReduceParam
{"reduceDimensions", required_argument, nullptr, 'R'}, {
{"scales", required_argument, nullptr, 'S'}, bool do_verification{true};
{"help", no_argument, nullptr, '?'}, bool propagateNan{false};
{nullptr, 0, nullptr, 0}}; bool useIndex{false};
bool time_kernel{false};
bool do_dumpout{false};
int init_method{2};
float alpha{1.0f};
float beta{0.0f};
std::vector<size_t> inLengths{64, 4, 280, 82};
std::vector<int> reduceDims{0, 1, 2, 3};
};
class SimpleAppArgs std::vector<std::vector<int>> SetGenericReduceDim()
{ {
private: return {{0, 1, 2, 3}, {0, 1, 2}, {0, 1, 3}, {0, 2, 3}, {1, 2, 3}, {0}, {1}, {2}, {3}};
int option_index = 0; }
public: template <typename T>
std::vector<size_t> inLengths; class ReduceWithIndexTest : public ::testing::Test
std::vector<int> reduceDims; {
std::vector<float> scales; protected:
using InDataType = std::tuple_element_t<0, T>;
using AccDataType = std::tuple_element_t<1, T>;
using OutDataType = std::tuple_element_t<2, T>;
int data_type; static std::vector<ReduceParam> params;
int init_method = 1;
public: static void SetUpTestSuite()
void show_usage(const char* cmd)
{ {
std::cout << "Usage of " << cmd << std::endl; // set testcase variables
std::cout << "--inLengths or -D, comma separated list of input tensor dimension lengths " ReduceParam set;
"(only 4-d tensor supported)" const auto setReduceDim = SetGenericReduceDim();
<< std::endl;
std::cout << "--reduceDimensions or -R comma seperated list of dimension indexes to reduce "
"(only 1 or 3 or 4 dimensions supported)"
<< std::endl;
std::cout << "--scales or -S, comma separated two float values for alpha and beta"
<< std::endl;
std::cout << "Arg1 -- data type (1: fp32, 3: int8, 5: bp16, 6: fp64)" << std::endl;
std::cout << "Arg2 -- init method(0=no init, 1=single integer value, 2=scope integer "
"value, 3=decimal value)"
<< std::endl;
};
int processArgs(int argc, char* argv[])
{
using ck::host_common::getTypeValuesFromString;
int ch;
while(1) for(std::size_t i(0); i < setReduceDim.size(); ++i)
{ {
ch = getopt_long(argc, argv, "D:R:S:", long_options, &option_index); set.reduceDims = setReduceDim[i];
if(ch == -1) params.emplace_back(set);
break; }
switch(ch) }
{
case 'D': template <ReduceTensorOp ReduceOpIdType>
if(!optarg) void Run()
throw std::runtime_error("Invalid option format!"); {
for(auto param : this->params)
inLengths = getTypeValuesFromString<size_t>(optarg);
break;
case 'R':
if(!optarg)
throw std::runtime_error("Invalid option format!");
reduceDims = getTypeValuesFromString<int>(optarg);
break;
case 'S':
if(!optarg)
throw std::runtime_error("Invalid option format!");
scales = getTypeValuesFromString<float>(optarg);
break;
case '?':
if(std::string(long_options[option_index].name) == "help")
{
show_usage(argv[0]);
return (-1);
};
break;
default: show_usage(argv[0]); return (-1);
};
};
if(optind + 2 > argc)
throw std::runtime_error("Invalid cmd-line arguments, more argumetns are needed!");
data_type = std::atoi(argv[optind++]);
init_method = std::atoi(argv[optind]);
if(scales.empty())
{ {
scales.push_back(1.0f); bool success = ck::profiler::profile_reduce_impl<InDataType, AccDataType, OutDataType>(
scales.push_back(0.0f); param.do_verification,
}; param.init_method,
param.do_dumpout,
param.time_kernel,
param.inLengths,
param.reduceDims,
ReduceOpIdType,
param.propagateNan,
param.useIndex,
param.alpha,
param.beta);
EXPECT_TRUE(success);
}
}
};
if(inLengths.size() != 4 || template <typename T>
(reduceDims.size() != 1 && reduceDims.size() != 3 && reduceDims.size() != 4)) std::vector<ReduceParam> ReduceWithIndexTest<T>::params = {};
return (-1);
if(data_type != 0 && data_type != 1 && data_type != 3 && data_type != 5 && data_type != 6) using Reduce_float_types = ::testing::Types<std::tuple<float, float, float>>;
return (-1); using Reduce_double_types = ::testing::Types<std::tuple<double, double, double>>;
using Reduce_int8t_types = ::testing::Types<std::tuple<int8_t, int8_t, int8_t>>;
using Reduce_half_types = ::testing::Types<std::tuple<ck::half_t, ck::half_t, ck::half_t>>;
using Reduce_bhalf_float_Types = ::testing::Types<std::tuple<ck::bhalf_t, float, ck::bhalf_t>>;
return (0); template <typename TType>
}; class ReduceWithIndexFloat : public ReduceWithIndexTest<TType>
{
}; };
bool test_reduce_with_index(int data_type, template <typename TType>
int init_method, class ReduceWithIndexDouble : public ReduceWithIndexTest<TType>
std::vector<int> reduceDims,
std::vector<size_t> inLengths,
ReduceTensorOp reduceOpId,
bool propagateNan,
float alpha,
float beta)
{ {
using ck::profiler::profile_reduce_impl; };
bool result = true; template <typename TType>
class ReduceWithIndexInt8 : public ReduceWithIndexTest<TType>
{
};
if(data_type == 0) template <typename TType>
{ class ReduceWithIndexHalf : public ReduceWithIndexTest<TType>
result = profile_reduce_impl<float, float, float>(true, {
init_method, };
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
else if(data_type == 1)
{
result = profile_reduce_impl<ck::half_t, ck::half_t, ck::half_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
else if(data_type == 3)
{
result = profile_reduce_impl<int8_t, int8_t, int8_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
else if(data_type == 5)
{
result = profile_reduce_impl<ck::bhalf_t, float, ck::bhalf_t>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
else if(data_type == 6)
{
result = profile_reduce_impl<double, double, double>(true,
init_method,
false,
false,
inLengths,
reduceDims,
reduceOpId,
propagateNan,
true,
alpha,
beta);
}
return (result); template <typename TType>
class ReduceWithIndexBHalfFloat : public ReduceWithIndexTest<TType>
{
}; };
constexpr ReduceTensorOp reduceOpId = ReduceTensorOp::AMAX; TYPED_TEST_SUITE(ReduceWithIndexFloat, Reduce_float_types);
constexpr bool propagateNan = false; TYPED_TEST_SUITE(ReduceWithIndexDouble, Reduce_double_types);
TYPED_TEST_SUITE(ReduceWithIndexInt8, Reduce_int8t_types);
TYPED_TEST_SUITE(ReduceWithIndexHalf, Reduce_half_types);
TYPED_TEST_SUITE(ReduceWithIndexBHalfFloat, Reduce_bhalf_float_Types);
int main(int argc, char* argv[]) TYPED_TEST(ReduceWithIndexFloat, ReduceWithIndexTestFloat_AMAX)
{ {
SimpleAppArgs args; // trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
bool result = true; TYPED_TEST(ReduceWithIndexFloat, ReduceWithIndexTestFloat_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
if(argc == 1) TYPED_TEST(ReduceWithIndexFloat, ReduceWithIndexTestFloat_MAX)
{ {
int data_type = 1; // trigger Run() -> Generic
int init_method = 2; this->template Run<ReduceTensorOp::MAX>();
std::vector<size_t> inLengths{64, 4, 280, 80}; }
std::vector<std::vector<int>> v_reduceDims{
{0, 1, 2, 3}, {0, 1, 2}, {1, 2, 3}, {0, 1, 3}, {0, 2, 3}, {0}, {1}, {2}, {3}}; TYPED_TEST(ReduceWithIndexDouble, ReduceWithIndexTestDouble_AMAX)
{
for(auto& reduceDims : v_reduceDims) // trigger Run() -> Generic
result = result && test_reduce_with_index(data_type, this->template Run<ReduceTensorOp::AMAX>();
init_method, }
reduceDims,
inLengths, TYPED_TEST(ReduceWithIndexDouble, ReduceWithIndexTestDouble_MIN)
reduceOpId, {
propagateNan, // trigger Run() -> Generic
1.0f, this->template Run<ReduceTensorOp::MIN>();
0.0f); }
}
else TYPED_TEST(ReduceWithIndexDouble, ReduceWithIndexTestDouble_MAX)
{ {
if(args.processArgs(argc, argv) < 0) // trigger Run() -> Generic
{ this->template Run<ReduceTensorOp::MAX>();
throw std::runtime_error( }
"Invalid input arguments, test_reduce_with_index could not be executed!");
}; TYPED_TEST(ReduceWithIndexInt8, ReduceWithIndexTestInt8_AMAX)
{
result = test_reduce_with_index(args.data_type, // trigger Run() -> Generic
args.init_method, this->template Run<ReduceTensorOp::AMAX>();
args.reduceDims, }
args.inLengths,
reduceOpId, TYPED_TEST(ReduceWithIndexInt8, ReduceWithIndexTestInt8_MIN)
propagateNan, {
args.scales[0], // trigger Run() -> Generic
args.scales[1]); this->template Run<ReduceTensorOp::MIN>();
} }
std::cout << "test_reduce_with_index ..... " << (result ? "SUCCESS" : "FAILURE") << std::endl; TYPED_TEST(ReduceWithIndexInt8, ReduceWithIndexTestInt8_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
}
TYPED_TEST(ReduceWithIndexHalf, ReduceWithIndexTestHalf_AMAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
TYPED_TEST(ReduceWithIndexHalf, ReduceWithIndexTestHalf_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
TYPED_TEST(ReduceWithIndexHalf, ReduceWithIndexTestHalf_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
}
TYPED_TEST(ReduceWithIndexBHalfFloat, ReduceWithIndexTesBtHalfFloat_AMAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::AMAX>();
}
return (result ? 0 : -1); TYPED_TEST(ReduceWithIndexBHalfFloat, ReduceWithIndexTestBHalfFloat_MIN)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MIN>();
}
TYPED_TEST(ReduceWithIndexBHalfFloat, ReduceWithIndexTestBHalfFloat_MAX)
{
// trigger Run() -> Generic
this->template Run<ReduceTensorOp::MAX>();
} }
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "test/smfmac_op/smfmac_op_util.hpp" #include "test/smfmac_op/smfmac_op_util.hpp"
#include "ck/host_utility/device_prop.hpp"
using BF16 = ck::bhalf_t; using BF16 = ck::bhalf_t;
using F16 = ck::half_t; using F16 = ck::half_t;
...@@ -38,40 +39,43 @@ class TestSmfmac : public ::testing::Test ...@@ -38,40 +39,43 @@ class TestSmfmac : public ::testing::Test
void Run() void Run()
{ {
bool pass = true; bool pass = true;
constexpr auto matmul_default = ck::smfmac_op_util::matmul<Src1Type, if(ck::get_device_name() == "gfx942")
Src1VecSize, {
Src2Type, constexpr auto matmul_default = ck::smfmac_op_util::matmul<Src1Type,
Src2VecSize, Src1VecSize,
GPUAccType, Src2Type,
AccVecSize, Src2VecSize,
DstType, GPUAccType,
M, AccVecSize,
N, DstType,
K>; M,
N,
K>;
constexpr auto smfmac_kernel_container = std::make_tuple(matmul_default); constexpr auto smfmac_kernel_container = std::make_tuple(matmul_default);
ck::static_for<0, std::tuple_size_v<decltype(smfmac_kernel_container)>, 1>{}([&](auto i) {
pass &= ck::smfmac_op_util::TestSmfmac<
std::tuple_element_t<i.value, decltype(smfmac_kernel_container)>,
Src1Type,
Src2Type,
DstType,
GPUAccType,
CPUAccType,
decltype(Row{}),
decltype(Row{}),
decltype(Row{}),
PassThrough,
PassThrough,
PassThrough,
AccVecSize,
M,
N,
K>{}(std::get<ck::Number<i>{}>(smfmac_kernel_container));
});
ck::static_for<0, std::tuple_size_v<decltype(smfmac_kernel_container)>, 1>{}(
[&](auto i) {
pass &= ck::smfmac_op_util::TestSmfmac<
std::tuple_element_t<i.value, decltype(smfmac_kernel_container)>,
Src1Type,
Src2Type,
DstType,
GPUAccType,
CPUAccType,
decltype(Row{}),
decltype(Row{}),
decltype(Row{}),
PassThrough,
PassThrough,
PassThrough,
AccVecSize,
M,
N,
K>{}(std::get<ck::Number<i>{}>(smfmac_kernel_container));
});
}
EXPECT_TRUE(pass); EXPECT_TRUE(pass);
} }
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment