Commit b93575ca authored by Jing Zhang's avatar Jing Zhang
Browse files

merge develop

parents 54df59bf c8a8385f
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942) list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
set(target 0) set(target 0)
foreach(gpu IN LISTS GPU_TARGETS) foreach(gpu IN LISTS GPU_TARGETS)
...@@ -12,3 +13,4 @@ foreach(gpu IN LISTS GPU_TARGETS) ...@@ -12,3 +13,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
set(target 1) set(target 1)
endif() endif()
endforeach() endforeach()
endif()
...@@ -108,7 +108,7 @@ TEST_F(TestGGemmSplitKInterface_MKNKMN, KLoops) ...@@ -108,7 +108,7 @@ TEST_F(TestGGemmSplitKInterface_MKNKMN, KLoops)
// kloops % 2 // kloops % 2
Ks = std::vector<int>{256, 512, 320, 768}; Ks = std::vector<int>{256, 512, 320, 768};
EXPECT_FALSE( EXPECT_TRUE(
DefaultGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs, kbatch)); DefaultGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs, kbatch));
// Not all gemms have same value for main_k0_block_loop! // Not all gemms have same value for main_k0_block_loop!
......
...@@ -147,14 +147,14 @@ struct DeviceGroupedGemmSplitkInstanceWrapper ...@@ -147,14 +147,14 @@ struct DeviceGroupedGemmSplitkInstanceWrapper
32, 32,
4, 4,
2, 2,
S<1, 4, 32, 1>, S<1, 4, 16, 1>,
ABlockTransferThreadClusterArrageOrder, ABlockTransferThreadClusterArrageOrder,
ABlockTransferSrcAccessOrder, ABlockTransferSrcAccessOrder,
ABlockTransferSrcVectorDim::value, ABlockTransferSrcVectorDim::value,
ABlockTransferSrcScalarPerVector, ABlockTransferSrcScalarPerVector,
ABlockTransferDstScalarPerVector_K1::value, ABlockTransferDstScalarPerVector_K1::value,
ABlockLdsAddExtraM::value, ABlockLdsAddExtraM::value,
S<1, 4, 32, 1>, S<1, 4, 16, 1>,
BBlockTransferThreadClusterArrageOrder, BBlockTransferThreadClusterArrageOrder,
BBlockTransferSrcAccessOrder, BBlockTransferSrcAccessOrder,
BBlockTransferSrcVectorDim::value, BBlockTransferSrcVectorDim::value,
......
add_custom_target(test_normalization) if(DTYPES MATCHES "fp16" OR DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
add_custom_target(test_normalization)
add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp) endif()
add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp) if(DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp) add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp) add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility device_normalization_instance)
target_link_libraries(test_layernorm2d_fp32 PRIVATE utility device_normalization_instance) target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
target_link_libraries(test_layernorm2d_fp16 PRIVATE utility device_normalization_instance) add_dependencies(test_normalization test_layernorm2d_fp32)
target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance) add_dependencies(test_normalization test_groupnorm_fp32)
target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance) endif()
if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
add_dependencies(test_normalization test_layernorm2d_fp32) add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
add_dependencies(test_normalization test_layernorm2d_fp16) add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
add_dependencies(test_normalization test_groupnorm_fp16) target_link_libraries(test_layernorm2d_fp16 PRIVATE utility device_normalization_instance)
add_dependencies(test_normalization test_groupnorm_fp32) target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
add_dependencies(test_normalization test_layernorm2d_fp16)
add_dependencies(test_normalization test_groupnorm_fp16)
endif()
add_custom_target(test_pool_fwd) add_custom_target(test_pool_fwd)
add_gtest_executable(test_avg_pool2d_fwd test_avg_pool2d_fwd.cpp)
add_gtest_executable(test_avg_pool3d_fwd test_avg_pool3d_fwd.cpp) add_gtest_executable(test_avg_pool3d_fwd test_avg_pool3d_fwd.cpp)
add_gtest_executable(test_max_pool2d_fwd test_max_pool2d_fwd.cpp)
add_gtest_executable(test_max_pool3d_fwd test_max_pool3d_fwd.cpp) add_gtest_executable(test_max_pool3d_fwd test_max_pool3d_fwd.cpp)
target_link_libraries(test_avg_pool2d_fwd PRIVATE utility device_pool_fwd_instance) target_link_libraries(test_avg_pool3d_fwd PRIVATE utility device_pool3d_fwd_instance)
target_link_libraries(test_avg_pool3d_fwd PRIVATE utility device_pool_fwd_instance) target_link_libraries(test_max_pool3d_fwd PRIVATE utility device_pool3d_fwd_instance)
target_link_libraries(test_max_pool2d_fwd PRIVATE utility device_pool_fwd_instance)
target_link_libraries(test_max_pool3d_fwd PRIVATE utility device_pool_fwd_instance)
add_dependencies(test_pool_fwd test_avg_pool2d_fwd)
add_dependencies(test_pool_fwd test_avg_pool3d_fwd) add_dependencies(test_pool_fwd test_avg_pool3d_fwd)
add_dependencies(test_pool_fwd test_max_pool2d_fwd)
add_dependencies(test_pool_fwd test_max_pool3d_fwd) add_dependencies(test_pool_fwd test_max_pool3d_fwd)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_pool2d_fwd_impl.hpp"
#include "test_pool_fwd_common.hpp"
template <typename Tuple>
class TestAvgPool2dFwd : public ::testing::Test
{
protected:
using InDataType = std::tuple_element_t<0, Tuple>;
using OutDataType = std::tuple_element_t<1, Tuple>;
using ComputeDataType = std::tuple_element_t<2, Tuple>;
using IndexDataType = std::tuple_element_t<3, Tuple>;
std::vector<PoolingParam> params;
void Run()
{
for(auto param : params)
{
bool success =
ck::profiler::profile_pool2d_fwd_impl<InDataType,
OutDataType,
ComputeDataType,
IndexDataType,
ck::ReduceTensorOp::AVG,
false,
false>(true,
2,
false,
false,
param.length_,
param.window_spatial_lengths_,
param.window_strides_,
param.input_left_pads_,
param.input_right_pads_);
EXPECT_TRUE(success);
}
}
};
using KernelTypes =
::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
TYPED_TEST_SUITE(TestAvgPool2dFwd, KernelTypes);
TYPED_TEST(TestAvgPool2dFwd, Test_Pool)
{
// length, window_length, window_stride, left_pad, right_pad
this->params = {{{1, 1, 1, 1}, {1, 1}, {1, 1}, {0, 0}, {0, 0}},
{{2, 16, 64, 64}, {64, 64}, {1, 1}, {0, 0}, {0, 0}},
{{2, 32, 30, 30}, {2, 2}, {2, 2}, {1, 1}, {1, 1}}};
this->Run();
}
...@@ -25,6 +25,8 @@ class TestAvgPool3dFwd : public ::testing::Test ...@@ -25,6 +25,8 @@ class TestAvgPool3dFwd : public ::testing::Test
OutDataType, OutDataType,
ComputeDataType, ComputeDataType,
IndexDataType, IndexDataType,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::NDHWC,
ck::ReduceTensorOp::AVG, ck::ReduceTensorOp::AVG,
false, false,
false>(true, false>(true,
...@@ -34,23 +36,27 @@ class TestAvgPool3dFwd : public ::testing::Test ...@@ -34,23 +36,27 @@ class TestAvgPool3dFwd : public ::testing::Test
param.length_, param.length_,
param.window_spatial_lengths_, param.window_spatial_lengths_,
param.window_strides_, param.window_strides_,
param.window_dilations_,
param.input_left_pads_, param.input_left_pads_,
param.input_right_pads_); param.input_right_pads_);
EXPECT_TRUE(success); EXPECT_TRUE(success);
} }
} }
}; };
#ifdef CK_ENABLE_FP16
using KernelTypes = using KernelTypes =
::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>; ::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
#else
using KernelTypes = ::testing::Types<std::tuple<F32, F32, F32, I32>>;
#endif
TYPED_TEST_SUITE(TestAvgPool3dFwd, KernelTypes); TYPED_TEST_SUITE(TestAvgPool3dFwd, KernelTypes);
TYPED_TEST(TestAvgPool3dFwd, Test_Pool) TYPED_TEST(TestAvgPool3dFwd, Test_Pool)
{ {
// length, window_length, window_stride, left_pad, right_pad // length, window_length, window_stride, window_dilation, left_pad, right_pad
this->params = {{{1, 1, 1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}, this->params = {{{1, 1, 1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
{{2, 16, 64, 64, 64}, {64, 64, 64}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}, {{2, 16, 64, 64, 64}, {64, 64, 64}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
{{2, 32, 30, 30, 30}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}}}; {{2, 16, 64, 64, 64}, {4, 4, 4}, {4, 4, 4}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}},
{{2, 32, 30, 30, 30}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}};
this->Run(); this->Run();
} }
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_pool2d_fwd_impl.hpp"
#include "test_pool_fwd_common.hpp"
template <typename Tuple>
class TestMaxPool2dFwd : public ::testing::Test
{
protected:
using InDataType = std::tuple_element_t<0, Tuple>;
using OutDataType = std::tuple_element_t<1, Tuple>;
using ComputeDataType = std::tuple_element_t<2, Tuple>;
using IndexDataType = std::tuple_element_t<3, Tuple>;
std::vector<PoolingParam> params;
void Run()
{
for(auto param : params)
{
// max pool
bool success =
ck::profiler::profile_pool2d_fwd_impl<InDataType,
OutDataType,
ComputeDataType,
IndexDataType,
ck::ReduceTensorOp::MAX,
false,
false>(true,
2,
false,
false,
param.length_,
param.window_spatial_lengths_,
param.window_strides_,
param.input_left_pads_,
param.input_right_pads_);
EXPECT_TRUE(success);
// max pool + index
success = ck::profiler::profile_pool2d_fwd_impl<InDataType,
OutDataType,
ComputeDataType,
IndexDataType,
ck::ReduceTensorOp::MAX,
false,
true>(true,
2,
false,
false,
param.length_,
param.window_spatial_lengths_,
param.window_strides_,
param.input_left_pads_,
param.input_right_pads_);
EXPECT_TRUE(success);
}
}
};
using KernelTypes =
::testing::Types<std::tuple<F16, F16, F16, I32>, std::tuple<F32, F32, F32, I32>>;
TYPED_TEST_SUITE(TestMaxPool2dFwd, KernelTypes);
TYPED_TEST(TestMaxPool2dFwd, Test_Pool)
{
// length, window_length, window_stride, left_pad, right_pad
this->params = {{{1, 1, 1, 1}, {1, 1}, {1, 1}, {0, 0}, {0, 0}},
{{2, 16, 64, 64}, {64, 64}, {1, 1}, {0, 0}, {0, 0}},
{{2, 32, 30, 30}, {2, 2}, {2, 2}, {1, 1}, {1, 1}}};
this->Run();
}
...@@ -26,6 +26,8 @@ class TestMaxPool3dFwd : public ::testing::Test ...@@ -26,6 +26,8 @@ class TestMaxPool3dFwd : public ::testing::Test
OutDataType, OutDataType,
ComputeDataType, ComputeDataType,
IndexDataType, IndexDataType,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::NDHWC,
ck::ReduceTensorOp::MAX, ck::ReduceTensorOp::MAX,
false, false,
false>(true, false>(true,
...@@ -35,6 +37,7 @@ class TestMaxPool3dFwd : public ::testing::Test ...@@ -35,6 +37,7 @@ class TestMaxPool3dFwd : public ::testing::Test
param.length_, param.length_,
param.window_spatial_lengths_, param.window_spatial_lengths_,
param.window_strides_, param.window_strides_,
param.window_dilations_,
param.input_left_pads_, param.input_left_pads_,
param.input_right_pads_); param.input_right_pads_);
EXPECT_TRUE(success); EXPECT_TRUE(success);
...@@ -44,6 +47,8 @@ class TestMaxPool3dFwd : public ::testing::Test ...@@ -44,6 +47,8 @@ class TestMaxPool3dFwd : public ::testing::Test
OutDataType, OutDataType,
ComputeDataType, ComputeDataType,
IndexDataType, IndexDataType,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::NDHWC,
ck::ReduceTensorOp::MAX, ck::ReduceTensorOp::MAX,
false, false,
true>(true, true>(true,
...@@ -53,6 +58,7 @@ class TestMaxPool3dFwd : public ::testing::Test ...@@ -53,6 +58,7 @@ class TestMaxPool3dFwd : public ::testing::Test
param.length_, param.length_,
param.window_spatial_lengths_, param.window_spatial_lengths_,
param.window_strides_, param.window_strides_,
param.window_dilations_,
param.input_left_pads_, param.input_left_pads_,
param.input_right_pads_); param.input_right_pads_);
EXPECT_TRUE(success); EXPECT_TRUE(success);
...@@ -60,16 +66,21 @@ class TestMaxPool3dFwd : public ::testing::Test ...@@ -60,16 +66,21 @@ class TestMaxPool3dFwd : public ::testing::Test
} }
}; };
#ifdef CK_ENABLE_FP16
using KernelTypes = using KernelTypes =
::testing::Types<std::tuple<F16, F16, F16, I32>, std::tuple<F32, F32, F32, I32>>; ::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
#else
using KernelTypes = ::testing::Types<std::tuple<F32, F32, F32, I32>>;
#endif
TYPED_TEST_SUITE(TestMaxPool3dFwd, KernelTypes); TYPED_TEST_SUITE(TestMaxPool3dFwd, KernelTypes);
TYPED_TEST(TestMaxPool3dFwd, Test_Pool) TYPED_TEST(TestMaxPool3dFwd, Test_Pool)
{ {
// length, window_length, window_stride, left_pad, right_pad // length, window_length, window_stride, window_dilation, left_pad, right_pad
this->params = {{{1, 1, 1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}, this->params = {{{1, 1, 1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
{{2, 16, 64, 64, 64}, {64, 64, 64}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}, {{2, 16, 64, 64, 64}, {64, 64, 64}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
{{2, 32, 30, 30, 30}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}}}; {{2, 16, 64, 64, 64}, {4, 4, 4}, {4, 4, 4}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}},
{{2, 32, 30, 30, 30}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}};
this->Run(); this->Run();
} }
...@@ -14,11 +14,13 @@ struct PoolingParam ...@@ -14,11 +14,13 @@ struct PoolingParam
PoolingParam(const std::vector<index_t>& length, PoolingParam(const std::vector<index_t>& length,
const std::vector<index_t>& window_spatial_lengths, const std::vector<index_t>& window_spatial_lengths,
const std::vector<index_t>& window_strides, const std::vector<index_t>& window_strides,
const std::vector<index_t>& window_dilations,
const std::vector<index_t>& input_left_pads, const std::vector<index_t>& input_left_pads,
const std::vector<index_t>& input_right_pads) const std::vector<index_t>& input_right_pads)
: length_(length), : length_(length),
window_spatial_lengths_(window_spatial_lengths), window_spatial_lengths_(window_spatial_lengths),
window_strides_(window_strides), window_strides_(window_strides),
window_dilations_(window_dilations),
input_left_pads_(input_left_pads), input_left_pads_(input_left_pads),
input_right_pads_(input_right_pads) input_right_pads_(input_right_pads)
{ {
...@@ -26,6 +28,7 @@ struct PoolingParam ...@@ -26,6 +28,7 @@ struct PoolingParam
std::vector<index_t> length_; std::vector<index_t> length_;
std::vector<index_t> window_spatial_lengths_; std::vector<index_t> window_spatial_lengths_;
std::vector<index_t> window_strides_; std::vector<index_t> window_strides_;
std::vector<index_t> window_dilations_;
std::vector<index_t> input_left_pads_; std::vector<index_t> input_left_pads_;
std::vector<index_t> input_right_pads_; std::vector<index_t> input_right_pads_;
}; };
...@@ -10,8 +10,9 @@ ...@@ -10,8 +10,9 @@
template <ck::index_t N> template <ck::index_t N>
using I = ck::Number<N>; using I = ck::Number<N>;
#ifdef CK_ENABLE_FP16
using F16 = ck::half_t; using F16 = ck::half_t;
#endif
using F32 = float; using F32 = float;
template <typename Tuple> template <typename Tuple>
...@@ -22,7 +23,9 @@ class TestSoftmax : public ck::TestSoftmax<Tuple> ...@@ -22,7 +23,9 @@ class TestSoftmax : public ck::TestSoftmax<Tuple>
// clang-format off // clang-format off
using KernelTypes = ::testing::Types< using KernelTypes = ::testing::Types<
// InDataType, AccDataType, OutDataType, Rank // InDataType, AccDataType, OutDataType, Rank
#ifdef CK_ENABLE_FP16
std::tuple< F16, F32, F16, I<3>>, std::tuple< F16, F32, F16, I<3>>,
#endif
std::tuple< F32, F32, F32, I<3>> std::tuple< F32, F32, F32, I<3>>
>; >;
// clang-format on // clang-format on
......
...@@ -10,8 +10,9 @@ ...@@ -10,8 +10,9 @@
template <ck::index_t N> template <ck::index_t N>
using I = ck::Number<N>; using I = ck::Number<N>;
#ifdef CK_ENABLE_FP16
using F16 = ck::half_t; using F16 = ck::half_t;
#endif
using F32 = float; using F32 = float;
template <typename Tuple> template <typename Tuple>
...@@ -22,7 +23,9 @@ class TestSoftmax : public ck::TestSoftmax<Tuple> ...@@ -22,7 +23,9 @@ class TestSoftmax : public ck::TestSoftmax<Tuple>
// clang-format off // clang-format off
using KernelTypes = ::testing::Types< using KernelTypes = ::testing::Types<
// InDataType, AccDataType, OutDataType, Rank // InDataType, AccDataType, OutDataType, Rank
#ifdef CK_ENABLE_FP16
std::tuple< F16, F32, F16, I<4>>, std::tuple< F16, F32, F16, I<4>>,
#endif
std::tuple< F32, F32, F32, I<4>> std::tuple< F32, F32, F32, I<4>>
>; >;
// clang-format on // clang-format on
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment