Commit cfcdb03e authored by Bartlomiej Wroblewski's avatar Bartlomiej Wroblewski
Browse files

Merge remote-tracking branch 'origin/develop' into bwroblew/contraction_mixed_dt

parents a30c626b e2243a4d
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include "profiler/profile_image_to_column_impl.hpp" #include "profiler/profile_conv_tensor_rearrange_impl.hpp"
#include "profiler_operation_registry.hpp" #include "profiler_operation_registry.hpp"
namespace { namespace {
enum struct RearrangeOp
{
ImageToColumn, // 0
ColumnToImage, // 1
};
enum struct ConvLayout enum struct ConvLayout
{ {
NHWC, // 0 NHWC, // 0
...@@ -24,8 +30,8 @@ enum struct DataType ...@@ -24,8 +30,8 @@ enum struct DataType
INT8_INT8, // 3 INT8_INT8, // 3
}; };
#define OP_NAME "image_to_column" #define OP_NAME "conv_tensor_rearrange"
#define OP_DESC "Image To Column" #define OP_DESC "Conv Tensor Rearrange"
static void print_helper_msg() static void print_helper_msg()
{ {
...@@ -41,16 +47,17 @@ static void print_helper_msg() ...@@ -41,16 +47,17 @@ static void print_helper_msg()
<< "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n" << "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n"
<< "arg6: print tensor value (0: no; 1: yes)\n" << "arg6: print tensor value (0: no; 1: yes)\n"
<< "arg7: time kernel (0: no, 1: yes)\n" << "arg7: time kernel (0: no, 1: yes)\n"
<< "arg8: operation type (0: ImageToColumn, 1: ColumnToImage)\n"
<< ck::utils::conv::get_conv_param_parser_helper_msg() << std::endl; << ck::utils::conv::get_conv_param_parser_helper_msg() << std::endl;
// clang-format on // clang-format on
} }
} // namespace } // namespace
int profile_image_to_column(int argc, char* argv[]) int profile_conv_tensor_rearrange(int argc, char* argv[])
{ {
// 8 for control, 1 for num_dim_spatial // 9 for control, 1 for num_dim_spatial
if(argc < 9) if(argc < 10)
{ {
print_helper_msg(); print_helper_msg();
return 1; return 1;
...@@ -62,16 +69,17 @@ int profile_image_to_column(int argc, char* argv[]) ...@@ -62,16 +69,17 @@ int profile_image_to_column(int argc, char* argv[])
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const bool time_kernel = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int num_dim_spatial = std::stoi(argv[8]); const auto rearrange_op = static_cast<RearrangeOp>(std::stoi(argv[8]));
const int num_dim_spatial = std::stoi(argv[9]);
// 8 for control, 1 for num_dim_spatial, 4 for G/N/K/C, and 6 * num_dim_spatial // 9 for control, 1 for num_dim_spatial, 4 for G/N/K/C, and 6 * num_dim_spatial
if(argc != 8 + 1 + 4 + 6 * num_dim_spatial) if(argc != 9 + 1 + 4 + 6 * num_dim_spatial)
{ {
print_helper_msg(); print_helper_msg();
return 1; return 1;
} }
const auto params = ck::utils::conv::parse_conv_param(num_dim_spatial, 9, argv); const auto params = ck::utils::conv::parse_conv_param(num_dim_spatial, 10, argv);
using F32 = float; using F32 = float;
using F16 = ck::half_t; using F16 = ck::half_t;
...@@ -79,12 +87,17 @@ int profile_image_to_column(int argc, char* argv[]) ...@@ -79,12 +87,17 @@ int profile_image_to_column(int argc, char* argv[])
using INT8 = int8_t; using INT8 = int8_t;
using namespace ck::tensor_layout::convolution; using namespace ck::tensor_layout::convolution;
using namespace ck::conv_tensor_rearrange_op;
constexpr auto I1 = ck::Number<1>{}; constexpr auto I1 = ck::Number<1>{};
constexpr auto I2 = ck::Number<2>{}; constexpr auto I2 = ck::Number<2>{};
constexpr auto I3 = ck::Number<3>{}; constexpr auto I3 = ck::Number<3>{};
auto profile = [&](auto num_dim_spatial_tmp, auto in_layout, auto in_type, auto out_type) { auto profile = [&](auto num_dim_spatial_tmp,
auto in_layout,
auto in_type,
auto out_type,
auto rearrange_op_type) {
constexpr ck::index_t NDimSpatial = num_dim_spatial_tmp.value; constexpr ck::index_t NDimSpatial = num_dim_spatial_tmp.value;
using InLayout = decltype(in_layout); using InLayout = decltype(in_layout);
...@@ -92,78 +105,147 @@ int profile_image_to_column(int argc, char* argv[]) ...@@ -92,78 +105,147 @@ int profile_image_to_column(int argc, char* argv[])
using InDataType = decltype(in_type); using InDataType = decltype(in_type);
using OutDataType = decltype(out_type); using OutDataType = decltype(out_type);
using Op = decltype(rearrange_op_type);
bool pass = ck::profiler:: bool pass = ck::profiler::
profile_image_to_column_impl<NDimSpatial, InLayout, InDataType, OutDataType>( profile_conv_tensor_rearrange_impl<NDimSpatial, InLayout, InDataType, OutDataType, Op>(
do_verification, init_method, do_log, time_kernel, params); do_verification, init_method, do_log, time_kernel, params);
return pass ? 0 : 1; return pass ? 0 : 1;
}; };
// NHWC // Image To Column
if(layout == ConvLayout::NHWC) if(rearrange_op == RearrangeOp::ImageToColumn)
{ {
if(num_dim_spatial == 1) // NHWC
if(layout == ConvLayout::NHWC)
{ {
if(data_type == DataType::F32_F32) if(num_dim_spatial == 1)
{
return profile(I1, GNWC{}, F32{}, F32{});
}
else if(data_type == DataType::F16_F16)
{ {
return profile(I1, GNWC{}, F16{}, F16{}); if(data_type == DataType::F32_F32)
{
return profile(I1, GNWC{}, F32{}, F32{}, ImageToColumn{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I1, GNWC{}, F16{}, F16{}, ImageToColumn{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I1, GNWC{}, BF16{}, BF16{}, ImageToColumn{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I1, GNWC{}, INT8{}, INT8{}, ImageToColumn{});
}
} }
else if(data_type == DataType::BF16_BF16) else if(num_dim_spatial == 2)
{ {
return profile(I1, GNWC{}, BF16{}, BF16{}); if(data_type == DataType::F32_F32)
{
return profile(I2, GNHWC{}, F32{}, F32{}, ImageToColumn{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I2, GNHWC{}, F16{}, F16{}, ImageToColumn{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I2, GNHWC{}, BF16{}, BF16{}, ImageToColumn{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I2, GNHWC{}, INT8{}, INT8{}, ImageToColumn{});
}
} }
else if(data_type == DataType::INT8_INT8) else if(num_dim_spatial == 3)
{ {
return profile(I1, GNWC{}, INT8{}, INT8{}); if(data_type == DataType::F32_F32)
{
return profile(I3, GNDHWC{}, F32{}, F32{}, ImageToColumn{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I3, GNDHWC{}, F16{}, F16{}, ImageToColumn{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I3, GNDHWC{}, BF16{}, BF16{}, ImageToColumn{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I3, GNDHWC{}, INT8{}, INT8{}, ImageToColumn{});
}
} }
} }
else if(num_dim_spatial == 2) }
{ else if(rearrange_op == RearrangeOp::ColumnToImage)
if(data_type == DataType::F32_F32) {
{ // NHWC
return profile(I2, GNHWC{}, F32{}, F32{}); if(layout == ConvLayout::NHWC)
}
else if(data_type == DataType::F16_F16)
{
return profile(I2, GNHWC{}, F16{}, F16{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I2, GNHWC{}, BF16{}, BF16{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I2, GNHWC{}, INT8{}, INT8{});
}
}
else if(num_dim_spatial == 3)
{ {
if(data_type == DataType::F32_F32) if(num_dim_spatial == 1)
{ {
return profile(I3, GNDHWC{}, F32{}, F32{}); if(data_type == DataType::F32_F32)
{
return profile(I1, GNWC{}, F32{}, F32{}, ColumnToImage{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I1, GNWC{}, F16{}, F16{}, ColumnToImage{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I1, GNWC{}, BF16{}, BF16{}, ColumnToImage{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I1, GNWC{}, INT8{}, INT8{}, ColumnToImage{});
}
} }
else if(data_type == DataType::F16_F16) else if(num_dim_spatial == 2)
{ {
return profile(I3, GNDHWC{}, F16{}, F16{}); if(data_type == DataType::F32_F32)
{
return profile(I2, GNHWC{}, F32{}, F32{}, ColumnToImage{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I2, GNHWC{}, F16{}, F16{}, ColumnToImage{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I2, GNHWC{}, BF16{}, BF16{}, ColumnToImage{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I2, GNHWC{}, INT8{}, INT8{}, ColumnToImage{});
}
} }
else if(data_type == DataType::BF16_BF16) else if(num_dim_spatial == 3)
{ {
return profile(I3, GNDHWC{}, BF16{}, BF16{}); if(data_type == DataType::F32_F32)
} {
else if(data_type == DataType::INT8_INT8) return profile(I3, GNDHWC{}, F32{}, F32{}, ColumnToImage{});
{ }
return profile(I3, GNDHWC{}, INT8{}, INT8{}); else if(data_type == DataType::F16_F16)
{
return profile(I3, GNDHWC{}, F16{}, F16{}, ColumnToImage{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I3, GNDHWC{}, BF16{}, BF16{}, ColumnToImage{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I3, GNDHWC{}, INT8{}, INT8{}, ColumnToImage{});
}
} }
} }
} }
std::cout << "this data_type & layout is not implemented" << std::endl; std::cout << "this data_type & layout is not implemented" << std::endl;
return 1; return 1;
} }
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_image_to_column); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_tensor_rearrange);
...@@ -23,6 +23,7 @@ enum struct GemmDataType ...@@ -23,6 +23,7 @@ enum struct GemmDataType
F16_F16_F16, // 1 F16_F16_F16, // 1
BF16_BF16_BF16, // 2 BF16_BF16_BF16, // 2
INT8_INT8_INT8, // 3 INT8_INT8_INT8, // 3
F8_F8_F8, // 4
}; };
#define OP_NAME "gemm" #define OP_NAME "gemm"
...@@ -31,7 +32,7 @@ enum struct GemmDataType ...@@ -31,7 +32,7 @@ enum struct GemmDataType
static void print_helper_msg() static void print_helper_msg()
{ {
std::cout << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n" std::cout << "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
<< "arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8)\n" << "arg2: data type (0: fp32; 1: fp16; 2: bf16; 3: int8; 4: fp8)\n"
<< "arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n" << "arg3: matrix layout (0: A[m, k] * B[k, n] = C[m, n];\n"
<< " 1: A[m, k] * B[n, k] = C[m, n];\n" << " 1: A[m, k] * B[n, k] = C[m, n];\n"
<< " 2: A[k, m] * B[k, n] = C[m, n];\n" << " 2: A[k, m] * B[k, n] = C[m, n];\n"
...@@ -76,6 +77,9 @@ int profile_gemm(int argc, char* argv[]) ...@@ -76,6 +77,9 @@ int profile_gemm(int argc, char* argv[])
using INT8 = int8_t; using INT8 = int8_t;
using INT32 = int32_t; using INT32 = int32_t;
#endif #endif
#ifdef CK_ENABLE_FP8
using F8 = ck::f8_t;
#endif
using Row = ck::tensor_layout::gemm::RowMajor; using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor; using Col = ck::tensor_layout::gemm::ColumnMajor;
...@@ -194,6 +198,24 @@ int profile_gemm(int argc, char* argv[]) ...@@ -194,6 +198,24 @@ int profile_gemm(int argc, char* argv[])
{ {
return profile(Col{}, Col{}, Row{}, INT8{}, INT8{}, INT32{}, INT8{}); return profile(Col{}, Col{}, Row{}, INT8{}, INT8{}, INT32{}, INT8{});
} }
#endif
#ifdef CK_ENABLE_FP8
else if(data_type == GemmDataType::F8_F8_F8 && layout == GemmMatrixLayout::MK_KN_MN)
{
return profile(Row{}, Row{}, Row{}, F8{}, F8{}, F32{}, F8{});
}
else if(data_type == GemmDataType::F8_F8_F8 && layout == GemmMatrixLayout::MK_NK_MN)
{
return profile(Row{}, Col{}, Row{}, F8{}, F8{}, F32{}, F8{});
}
else if(data_type == GemmDataType::F8_F8_F8 && layout == GemmMatrixLayout::KM_KN_MN)
{
return profile(Col{}, Row{}, Row{}, F8{}, F8{}, F32{}, F8{});
}
else if(data_type == GemmDataType::F8_F8_F8 && layout == GemmMatrixLayout::KM_NK_MN)
{
return profile(Col{}, Col{}, Row{}, F8{}, F8{}, F32{}, F8{});
}
#endif #endif
else else
{ {
......
...@@ -155,7 +155,7 @@ add_subdirectory(contraction) ...@@ -155,7 +155,7 @@ add_subdirectory(contraction)
add_subdirectory(pool) add_subdirectory(pool)
add_subdirectory(batched_gemm_multi_d) add_subdirectory(batched_gemm_multi_d)
add_subdirectory(grouped_convnd_bwd_data) add_subdirectory(grouped_convnd_bwd_data)
add_subdirectory(image_to_column) add_subdirectory(conv_tensor_rearrange)
if(GPU_TARGETS MATCHES "gfx11") if(GPU_TARGETS MATCHES "gfx11")
add_subdirectory(wmma_op) add_subdirectory(wmma_op)
endif() endif()
...@@ -70,10 +70,23 @@ class TestBatchNormBwdRank4 : public ::testing::Test ...@@ -70,10 +70,23 @@ class TestBatchNormBwdRank4 : public ::testing::Test
} }
}; };
using KernelTypes = ::testing::Types<std::tuple<F16, F32, F32, F32, F16, F32, F32>, using KernelTypes = ::testing::Types<
std::tuple<F32, F32, F32, F32, F32, F32, F32>, #ifdef CK_ENABLE_FP16
std::tuple<BF16, F32, F32, F32, BF16, F32, F32>, std::tuple<F16, F32, F32, F32, F16, F32, F32>
std::tuple<F64, F64, F64, F64, F64, F64, F64>>; #endif
#ifdef CK_ENABLE_FP32
,
std::tuple<F32, F32, F32, F32, F32, F32, F32>
#endif
#ifdef CK_ENABLE_BF16
,
std::tuple<BF16, F32, F32, F32, BF16, F32, F32>
#endif
#ifdef CK_ENABLE_FP64
,
std::tuple<F64, F64, F64, F64, F64, F64, F64>
#endif
>;
TYPED_TEST_SUITE(TestBatchNormBwdRank4, KernelTypes); TYPED_TEST_SUITE(TestBatchNormBwdRank4, KernelTypes);
......
...@@ -87,10 +87,23 @@ class TestBatchNormFwdRank4 : public ::testing::Test ...@@ -87,10 +87,23 @@ class TestBatchNormFwdRank4 : public ::testing::Test
} }
}; };
using KernelTypes = ::testing::Types<std::tuple<F16, F16, F32, F16, F16, F32>, using KernelTypes = ::testing::Types<
std::tuple<F32, F32, F32, F32, F32, F32>, #ifdef CK_ENABLE_FP16
std::tuple<BF16, BF16, F32, BF16, BF16, F32>, std::tuple<F16, F16, F32, F16, F16, F32>
std::tuple<F64, F64, F64, F64, F64, F64>>; #endif
#ifdef CK_ENABLE_FP32
,
std::tuple<F32, F32, F32, F32, F32, F32>
#endif
#ifdef CK_ENABLE_BF16
,
std::tuple<BF16, BF16, F32, BF16, BF16, F32>
#endif
#ifdef CK_ENABLE_FP64
,
std::tuple<F64, F64, F64, F64, F64, F64>
#endif
>;
TYPED_TEST_SUITE(TestBatchNormFwdRank4, KernelTypes); TYPED_TEST_SUITE(TestBatchNormFwdRank4, KernelTypes);
......
...@@ -67,10 +67,23 @@ class TestBatchNormInferRank4 : public ::testing::Test ...@@ -67,10 +67,23 @@ class TestBatchNormInferRank4 : public ::testing::Test
} }
}; };
using KernelTypes = ::testing::Types<std::tuple<F16, F16, F32, F16, F16, F32>, using KernelTypes = ::testing::Types<
std::tuple<F32, F32, F32, F32, F32, F32>, #ifdef CK_ENABLE_FP16
std::tuple<BF16, BF16, F32, BF16, BF16, F32>, std::tuple<F16, F16, F32, F16, F16, F32>
std::tuple<F64, F64, F64, F64, F64, F64>>; #endif
#ifdef CK_ENABLE_FP32
,
std::tuple<F32, F32, F32, F32, F32, F32>
#endif
#ifdef CK_ENABLE_BF16
,
std::tuple<BF16, BF16, F32, BF16, BF16, F32>
#endif
#ifdef CK_ENABLE_FP64
,
std::tuple<F64, F64, F64, F64, F64, F64>
#endif
>;
TYPED_TEST_SUITE(TestBatchNormInferRank4, KernelTypes); TYPED_TEST_SUITE(TestBatchNormInferRank4, KernelTypes);
......
add_gtest_executable(test_conv_tensor_rearrange test_conv_tensor_rearrange.cpp)
target_link_libraries(test_conv_tensor_rearrange PRIVATE utility device_image_to_column_instance device_column_to_image_instance)
add_gtest_executable(test_conv_tensor_rearrange_interface test_conv_tensor_rearrange_interface.cpp)
target_link_libraries(test_conv_tensor_rearrange_interface PRIVATE utility)
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib> #include <cstdlib>
#include <iostream> #include <iostream>
...@@ -9,29 +9,29 @@ ...@@ -9,29 +9,29 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "profiler/profile_image_to_column_impl.hpp" #include "profiler/profile_conv_tensor_rearrange_impl.hpp"
template <typename Tuple> template <typename Tuple>
class TestImageToColumn : public ::testing::Test class TestConvTensorRearrange : public ::testing::Test
{ {
protected: protected:
using InDataType = std::tuple_element_t<0, Tuple>; using ImLayout = std::tuple_element_t<0, Tuple>;
using OutDataType = std::tuple_element_t<1, Tuple>; using ConvTensorRearrangeOp = std::tuple_element_t<1, Tuple>;
using InLayout = std::tuple_element_t<2, Tuple>;
std::vector<ck::utils::conv::ConvParam> conv_params; std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial> template <ck::index_t NDimSpatial, typename InDataType, typename OutDataType>
void Run() void Run()
{ {
EXPECT_FALSE(conv_params.empty()); EXPECT_FALSE(conv_params.empty());
bool pass = true; bool pass = true;
for(auto& param : conv_params) for(auto& param : conv_params)
{ {
pass = pass && ck::profiler::profile_image_to_column_impl<NDimSpatial, pass = pass && ck::profiler::profile_conv_tensor_rearrange_impl<NDimSpatial,
InLayout, ImLayout,
InDataType, InDataType,
OutDataType>( OutDataType,
ConvTensorRearrangeOp>(
true, // do_verification true, // do_verification
1, // init_method: integer value 1, // init_method: integer value
false, // do_log false, // do_log
...@@ -43,48 +43,43 @@ class TestImageToColumn : public ::testing::Test ...@@ -43,48 +43,43 @@ class TestImageToColumn : public ::testing::Test
}; };
using namespace ck::tensor_layout::convolution; using namespace ck::tensor_layout::convolution;
using namespace ck::conv_tensor_rearrange_op;
using KernelTypes1d = ::testing::Types<std::tuple<float, float, GNWC>, using KernelTypes1d =
std::tuple<ck::bhalf_t, ck::bhalf_t, GNWC>, ::testing::Types<std::tuple<GNWC, ImageToColumn>, std::tuple<GNWC, ColumnToImage>>;
std::tuple<ck::half_t, ck::half_t, GNWC>,
std::tuple<int8_t, int8_t, GNWC>>;
using KernelTypes2d = ::testing::Types<std::tuple<float, float, GNHWC>, using KernelTypes2d =
std::tuple<ck::bhalf_t, ck::bhalf_t, GNHWC>, ::testing::Types<std::tuple<GNHWC, ImageToColumn>, std::tuple<GNHWC, ColumnToImage>>;
std::tuple<ck::half_t, ck::half_t, GNHWC>,
std::tuple<int8_t, int8_t, GNHWC>>;
using KernelTypes3d = ::testing::Types<std::tuple<float, float, GNDHWC>, using KernelTypes3d =
std::tuple<ck::bhalf_t, ck::bhalf_t, GNDHWC>, ::testing::Types<std::tuple<GNDHWC, ImageToColumn>, std::tuple<GNDHWC, ColumnToImage>>;
std::tuple<ck::half_t, ck::half_t, GNDHWC>,
std::tuple<int8_t, int8_t, GNDHWC>>;
template <typename Tuple> template <typename Tuple>
class TestImageToColumn1d : public TestImageToColumn<Tuple> class TestConvTensorRearrange1d : public TestConvTensorRearrange<Tuple>
{ {
}; };
template <typename Tuple> template <typename Tuple>
class TestImageToColumn2d : public TestImageToColumn<Tuple> class TestConvTensorRearrange2d : public TestConvTensorRearrange<Tuple>
{ {
}; };
template <typename Tuple> template <typename Tuple>
class TestImageToColumn3d : public TestImageToColumn<Tuple> class TestConvTensorRearrange3d : public TestConvTensorRearrange<Tuple>
{ {
}; };
TYPED_TEST_SUITE(TestImageToColumn1d, KernelTypes1d); TYPED_TEST_SUITE(TestConvTensorRearrange1d, KernelTypes1d);
TYPED_TEST_SUITE(TestImageToColumn2d, KernelTypes2d); TYPED_TEST_SUITE(TestConvTensorRearrange2d, KernelTypes2d);
TYPED_TEST_SUITE(TestImageToColumn3d, KernelTypes3d); TYPED_TEST_SUITE(TestConvTensorRearrange3d, KernelTypes3d);
TYPED_TEST(TestImageToColumn1d, Test1D) TYPED_TEST(TestConvTensorRearrange1d, Test1D)
{ {
this->conv_params.clear(); this->conv_params.clear();
this->conv_params.push_back({1, 1, 4, 1, 192, {3}, {28}, {1}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 4, 1, 192, {3}, {28}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 1, 64, 1, 64, {3}, {14}, {1}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 64, 1, 64, {3}, {14}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {7}, {2}, {1}, {0}, {0}}); this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {7}, {3}, {1}, {0}, {0}});
this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {3}, {1}, {1}, {0}, {0}}); this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {3}, {1}, {1}, {0}, {0}});
// ScalarPerVector should be 1 // ScalarPerVector should be 1
this->conv_params.push_back({1, 1, 4, 1, 1, {3}, {28}, {1}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 4, 1, 1, {3}, {28}, {1}, {1}, {1}, {1}});
...@@ -92,10 +87,21 @@ TYPED_TEST(TestImageToColumn1d, Test1D) ...@@ -92,10 +87,21 @@ TYPED_TEST(TestImageToColumn1d, Test1D)
this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {2}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {2}, {1}, {1}, {1}});
// dilation != 1 // dilation != 1
this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {1}, {2}, {1}, {1}}); this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {1}, {2}, {1}, {1}});
this->template Run<1>(); #ifdef CK_ENABLE_FP32
this->template Run<1, float, float>();
#endif
#ifdef CK_ENABLE_BF16
this->template Run<1, ck::bhalf_t, ck::bhalf_t>();
#endif
#ifdef CK_ENABLE_FP16
this->template Run<1, ck::half_t, ck::half_t>();
#endif
#ifdef CK_ENABLE_INT8
this->template Run<1, int8_t, int8_t>();
#endif
} }
TYPED_TEST(TestImageToColumn2d, Test2D) TYPED_TEST(TestConvTensorRearrange2d, Test2D)
{ {
this->conv_params.clear(); this->conv_params.clear();
...@@ -103,19 +109,45 @@ TYPED_TEST(TestImageToColumn2d, Test2D) ...@@ -103,19 +109,45 @@ TYPED_TEST(TestImageToColumn2d, Test2D)
{2, 1, 4, 1, 192, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}); {2, 1, 4, 1, 192, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back( this->conv_params.push_back(
{2, 1, 64, 1, 64, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}); {2, 1, 64, 1, 64, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}}); this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {7, 7}, {3, 3}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}}); this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->template Run<2>(); this->conv_params.push_back(
{2, 1, 64, 1, 64, {3, 3}, {28, 28}, {2, 2}, {2, 2}, {1, 1}, {1, 1}});
#ifdef CK_ENABLE_FP32
this->template Run<2, float, float>();
#endif
#ifdef CK_ENABLE_BF16
this->template Run<2, ck::bhalf_t, ck::bhalf_t>();
#endif
#ifdef CK_ENABLE_FP16
this->template Run<2, ck::half_t, ck::half_t>();
#endif
#ifdef CK_ENABLE_INT8
this->template Run<2, int8_t, int8_t>();
#endif
} }
TYPED_TEST(TestImageToColumn3d, Test3D) TYPED_TEST(TestConvTensorRearrange3d, Test3D)
{ {
this->conv_params.clear(); this->conv_params.clear();
this->conv_params.push_back( this->conv_params.push_back(
{3, 1, 16, 1, 64, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}); {3, 1, 16, 1, 64, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {3, 3, 3}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back( this->conv_params.push_back(
{3, 1, 2, 1, 64, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}); {3, 1, 2, 1, 64, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back( this->conv_params.push_back(
{3, 1, 32, 1, 64, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}); {3, 1, 32, 1, 64, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->template Run<3>(); this->conv_params.push_back(
{3, 1, 64, 1, 64, {3, 3, 3}, {14, 14, 14}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}});
#ifdef CK_ENABLE_FP32
this->template Run<3, float, float>();
#endif
#ifdef CK_ENABLE_BF16
this->template Run<3, ck::bhalf_t, ck::bhalf_t>();
#endif
#ifdef CK_ENABLE_FP16
this->template Run<3, ck::half_t, ck::half_t>();
#endif
#ifdef CK_ENABLE_INT8
this->template Run<3, int8_t, int8_t>();
#endif
} }
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib> #include <cstdlib>
#include <iostream> #include <iostream>
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp" #include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp"
#include "ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp"
#include "ck/library/utility/convolution_parameter.hpp" #include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/algorithm.hpp"
...@@ -18,28 +20,37 @@ ...@@ -18,28 +20,37 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
using DataType = float; using DataType = float;
using InLayout = ck::tensor_layout::convolution::GNWC; using ImLayout = ck::tensor_layout::convolution::GNWC;
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
using namespace ck::conv_tensor_rearrange_op;
template <ck::index_t ScalarPerVector, bool IsCPacked> template <ck::index_t ScalarPerVector, bool IsCPacked>
class TestImageToColumnInterface : public ::testing::Test class TestConvTensorRearrangeInterface : public ::testing::Test
{ {
protected: protected:
static constexpr ck::index_t NDimSpatial = 1; static constexpr ck::index_t NDimSpatial = 1;
// clang-format off // clang-format off
using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| // Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| // Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| // Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | // | | | | | | | | |
< NDimSpatial, InLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>; < NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
using DeviceColToimgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
// Dim| | | | Size| Block| Block| Cluster| Per|
// Spatial| | | | | | | Lengths| Vector|
// | | | | | | | | |
< NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
// clang-format on // clang-format on
ck::utils::conv::ConvParam conv_param; ck::utils::conv::ConvParam conv_param;
template <typename ConvTensorRearrangeOp>
bool Run() bool Run()
{ {
...@@ -57,10 +68,10 @@ class TestImageToColumnInterface : public ::testing::Test ...@@ -57,10 +68,10 @@ class TestImageToColumnInterface : public ::testing::Test
ck::accumulate_n<ck::index_t>( ck::accumulate_n<ck::index_t>(
conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>()); conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
const auto in_desc = const auto image_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>( ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<ImLayout>(
conv_param); conv_param);
const auto out_desc = HostTensorDescriptor({NDoHoWo, CZYX}); const auto gemm_desc = HostTensorDescriptor({NDoHoWo, CZYX});
std::array<ck::index_t, NDimSpatial> input_spatial_lengths{}; std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{}; std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
...@@ -77,120 +88,173 @@ class TestImageToColumnInterface : public ::testing::Test ...@@ -77,120 +88,173 @@ class TestImageToColumnInterface : public ::testing::Test
copy(conv_param.input_spatial_lengths_, input_spatial_lengths); copy(conv_param.input_spatial_lengths_, input_spatial_lengths);
copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths); copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths);
copy(conv_param.output_spatial_lengths_, output_spatial_lengths); copy(conv_param.output_spatial_lengths_, output_spatial_lengths);
copy(in_desc.GetStrides(), input_g_n_c_wis_strides); copy(image_desc.GetStrides(), input_g_n_c_wis_strides);
copy(out_desc.GetStrides(), output_m_k_strides); copy(gemm_desc.GetStrides(), output_m_k_strides);
copy(conv_param.conv_filter_strides_, conv_filter_strides); copy(conv_param.conv_filter_strides_, conv_filter_strides);
copy(conv_param.conv_filter_dilations_, conv_filter_dilations); copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
copy(conv_param.input_left_pads_, input_left_pads); copy(conv_param.input_left_pads_, input_left_pads);
copy(conv_param.input_right_pads_, input_right_pads); copy(conv_param.input_right_pads_, input_right_pads);
auto img2col = DeviceImgToColInstance{}; if constexpr(std::is_same_v<ConvTensorRearrangeOp, ImageToColumn>)
auto argument = img2col.MakeArgument(nullptr, {
nullptr, auto img2col = DeviceImgToColInstance{};
N, auto argument = img2col.MakeArgument(nullptr,
IsCPacked ? C : FakeC, nullptr,
input_spatial_lengths, N,
filter_spatial_lengths, IsCPacked ? C : FakeC,
output_spatial_lengths, input_spatial_lengths,
input_g_n_c_wis_strides, filter_spatial_lengths,
output_m_k_strides, output_spatial_lengths,
conv_filter_strides, input_g_n_c_wis_strides,
conv_filter_dilations, output_m_k_strides,
input_left_pads, conv_filter_strides,
input_right_pads); conv_filter_dilations,
input_left_pads,
return img2col.IsSupportedArgument(argument); input_right_pads);
return img2col.IsSupportedArgument(argument);
}
else if constexpr(std::is_same_v<ConvTensorRearrangeOp, ColumnToImage>)
{
auto col2img = DeviceColToimgInstance{};
auto argument = col2img.MakeArgument(nullptr,
nullptr,
N,
IsCPacked ? C : FakeC,
input_spatial_lengths,
filter_spatial_lengths,
output_spatial_lengths,
input_g_n_c_wis_strides,
output_m_k_strides,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads);
return col2img.IsSupportedArgument(argument);
}
} }
}; };
class TestImageToColumnInterface1ScalarPerVector : public TestImageToColumnInterface<1, true> class TestConvTensorRearrangeInterface1ScalarPerVector
: public TestConvTensorRearrangeInterface<1, true>
{ {
}; };
class TestImageToColumnInterface4ScalarPerVector : public TestImageToColumnInterface<4, true> class TestConvTensorRearrangeInterface4ScalarPerVector
: public TestConvTensorRearrangeInterface<4, true>
{ {
}; };
class TestImageToColumnInterface4ScalarPerVectorFakeC : public TestImageToColumnInterface<4, false> class TestConvTensorRearrangeInterface4ScalarPerVectorFakeC
: public TestConvTensorRearrangeInterface<4, false>
{ {
}; };
TEST_F(TestImageToColumnInterface1ScalarPerVector, X1ScalarPerVector) TEST_F(TestConvTensorRearrangeInterface1ScalarPerVector, X1ScalarPerVector)
{ {
// vector load C * X % ScalarPerVector // vector load C * X % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run(); bool is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C * left_pad_x % ScalarPerVector // vector load C * left_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C * right_pad_x % ScalarPerVector // vector load C * right_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, right_pad and stride // vector load C % ScalarPerVector, right_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, left_pad and stride // vector load C % ScalarPerVector, left_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, dilation // vector load C % ScalarPerVector, dilation
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// C = 4 // C = 4
this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}}; this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
} }
TEST_F(TestImageToColumnInterface4ScalarPerVector, X4ScalarPerVector) TEST_F(TestConvTensorRearrangeInterface4ScalarPerVector, X4ScalarPerVector)
{ {
// vector load C * X % ScalarPerVector // vector load C * X % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run(); bool is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C * left_pad_x % ScalarPerVector // vector load C * left_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C * right_pad_x % ScalarPerVector // vector load C * right_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, right_pad and stride // vector load C % ScalarPerVector, right_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, left_pad and stride // vector load C % ScalarPerVector, left_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, dilation // vector load C % ScalarPerVector, dilation
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// C = 4 // C = 4
this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}}; this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
} }
TEST_F(TestImageToColumnInterface4ScalarPerVectorFakeC, X4ScalarPerVectorFakeC) TEST_F(TestConvTensorRearrangeInterface4ScalarPerVectorFakeC, X4ScalarPerVectorFakeC)
{ {
// C = 3 // C = 3
this->conv_param = {1, 1, 1, 1, 3, {4}, {3}, {1}, {1}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 3, {4}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run(); bool is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// C = 4 // C = 4
this->conv_param = {1, 1, 1, 1, 8, {4}, {3}, {1}, {1}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 8, {4}, {3}, {1}, {1}, {0}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
} }
add_gtest_executable(test_grouped_convnd_fwd grouped_convnd_fwd.cpp) add_gtest_executable(test_grouped_convnd_fwd test_grouped_convnd_fwd.cpp)
target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance) target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
class TestGroupedConvNdFwd : public ::testing::Test
{
protected:
std::vector<ck::utils::conv::ConvParam> conv_params;
};
// 1d GNWC/GKXC/GNWK
TEST_F(TestGroupedConvNdFwd, GroupedConv1dFwdGNWC)
{
conv_params.clear();
conv_params.push_back({1, 2, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
conv_params.push_back({1, 2, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
conv_params.push_back({1, 2, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
conv_params.push_back({1, 1, 1, 1, 32, {3}, {32}, {1}, {1}, {1}, {1}});
conv_params.push_back({1, 1, 1, 64, 3, {3}, {32}, {1}, {1}, {1}, {1}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 2d GNHWC/GKYXC/GNHWK
TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdGNHWC)
{
conv_params.clear();
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GNHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GNHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GNHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GNHWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 3d GNDHWC/GKZYXC/GNDHWK
TEST_F(TestGroupedConvNdFwd, GroupedConv3dFwdGNDHWC)
{
conv_params.clear();
conv_params.push_back(
{3, 2, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
conv_params.push_back(
{3, 2, 128, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
conv_params.push_back(
{3, 2, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
conv_params.push_back(
{3, 1, 1, 1, 32, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
conv_params.push_back(
{3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
ck::tensor_layout::convolution::GNDHWC,
ck::tensor_layout::convolution::GKZYXC,
ck::tensor_layout::convolution::GNDHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
ck::tensor_layout::convolution::GNDHWC,
ck::tensor_layout::convolution::GKZYXC,
ck::tensor_layout::convolution::GNDHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
ck::tensor_layout::convolution::GNDHWC,
ck::tensor_layout::convolution::GKZYXC,
ck::tensor_layout::convolution::GNDHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
ck::tensor_layout::convolution::GNDHWC,
ck::tensor_layout::convolution::GKZYXC,
ck::tensor_layout::convolution::GNDHWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 2d NHWGC/KYXGC/NHWGK
TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdNHWGC)
{
conv_params.clear();
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
for(auto& param : conv_params)
{
bool pass;
// fp16
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::NHWGC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::NHWGK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
{
protected:
using DataType = std::tuple_element_t<0, Tuple>;
using InLayout = std::tuple_element_t<1, Tuple>;
using WeiLayout = std::tuple_element_t<2, Tuple>;
using OutLayout = std::tuple_element_t<3, Tuple>;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes1d = ::testing::Types<std::tuple<float, GNWC, GKXC, GNWK>,
std::tuple<ck::half_t, GNWC, GKXC, GNWK>,
std::tuple<ck::bhalf_t, GNWC, GKXC, GNWK>,
std::tuple<int8_t, GNWC, GKXC, GNWK>>;
using KernelTypes2d = ::testing::Types<std::tuple<float, GNHWC, GKYXC, GNHWK>,
std::tuple<ck::half_t, GNHWC, GKYXC, GNHWK>,
std::tuple<ck::bhalf_t, GNHWC, GKYXC, GNHWK>,
std::tuple<int8_t, GNHWC, GKYXC, GNHWK>,
std::tuple<float, NHWGC, GKYXC, NHWGK>,
std::tuple<ck::half_t, NHWGC, GKYXC, NHWGK>,
std::tuple<ck::bhalf_t, NHWGC, GKYXC, NHWGK>,
std::tuple<int8_t, NHWGC, GKYXC, NHWGK>>;
using KernelTypes3d = ::testing::Types<std::tuple<float, GNDHWC, GKZYXC, GNDHWK>,
std::tuple<ck::half_t, GNDHWC, GKZYXC, GNDHWK>,
std::tuple<ck::bhalf_t, GNDHWC, GKZYXC, GNDHWK>,
std::tuple<int8_t, GNDHWC, GKZYXC, GNDHWK>,
std::tuple<float, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::half_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<ck::bhalf_t, NDHWGC, GKZYXC, NDHWGK>,
std::tuple<int8_t, NDHWGC, GKZYXC, NDHWGK>>;
template <typename Tuple>
class TestGroupedConvndFwd1d : public TestGroupedConvndFwd<Tuple>
{
};
template <typename Tuple>
class TestGroupedConvndFwd2d : public TestGroupedConvndFwd<Tuple>
{
};
template <typename Tuple>
class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple>
{
};
TYPED_TEST_SUITE(TestGroupedConvndFwd1d, KernelTypes1d);
TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d);
TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d);
TYPED_TEST(TestGroupedConvndFwd1d, Test1D)
{
this->conv_params.clear();
this->conv_params.push_back({1, 2, 32, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
this->conv_params.push_back({1, 2, 32, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 2, 32, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
this->conv_params.push_back({1, 1, 1, 1, 32, {3}, {32}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 1, 1, 64, 3, {3}, {32}, {1}, {1}, {1}, {1}});
this->template Run<1>();
}
TYPED_TEST(TestGroupedConvndFwd2d, Test2D)
{
this->conv_params.clear();
this->conv_params.push_back(
{2, 2, 32, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back(
{2, 2, 32, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back(
{2, 2, 32, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->template Run<2>();
}
TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 2, 32, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 1, 1, 32, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
add_gtest_executable(test_image_to_column test_image_to_column.cpp)
target_link_libraries(test_image_to_column PRIVATE utility device_image_to_column_instance)
add_gtest_executable(test_image_to_column_interface test_image_to_column_interface.cpp)
target_link_libraries(test_image_to_column_interface PRIVATE utility)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment