Commit ebb5522c authored by Mateusz Ozga's avatar Mateusz Ozga Committed by root
Browse files

Apply cshuffle to bwd_weight_cshuffle operator

parent fdfe2102
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev5_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
NDHWGK,
F32,
F32,
F32,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_bwd_weight_xdl_c_shuffle_f32_instances<
3,
NDHWGC,
GKZYXC,
NDHWGK,
ConvBwdWeightDefault,
BlockGemmPipelineScheduler::Intrawave,
BlockGemmPipelineVersion::v5>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" #include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp" #include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp"
...@@ -10,7 +10,7 @@ namespace device { ...@@ -10,7 +10,7 @@ namespace device {
namespace instance { namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k] // Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances( void add_device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev2_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3, std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC, NDHWGC,
GKZYXC, GKZYXC,
...@@ -22,22 +22,15 @@ void add_device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances ...@@ -22,22 +22,15 @@ void add_device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances
PassThrough, PassThrough,
PassThrough>>>& instances) PassThrough>>>& instances)
{ {
// 1. Default
add_device_operation_instances(
instances,
device_grouped_conv_bwd_weight_xdl_c_shuffle_f32_instances<3,
NDHWGC,
GKZYXC,
NDHWGK,
ConvBwdWeightDefault>{});
// 2. Filter1x1Stride1Pad0
add_device_operation_instances(instances, add_device_operation_instances(instances,
device_grouped_conv_bwd_weight_xdl_c_shuffle_f32_instances< device_grouped_conv_bwd_weight_xdl_c_shuffle_f32_instances<
3, 3,
NDHWGC, NDHWGC,
GKZYXC, GKZYXC,
NDHWGK, NDHWGK,
ConvBwdWeightFilter1x1Stride1Pad0>{}); ConvBwdWeightFilter1x1Stride1Pad0,
BlockGemmPipelineScheduler::Intrawave,
BlockGemmPipelineVersion::v2>{});
} }
} // namespace instance } // namespace instance
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// Compilation parameters for in[n, hi, wi, g, c] * wei[g, k, y, x, c] = out[n, ho, wo, g, k]
void add_device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev5_instances(
std::vector<std::unique_ptr<DeviceGroupedConvBwdWeight<3,
NDHWGC,
GKZYXC,
NDHWGK,
F32,
F32,
F32,
PassThrough,
PassThrough,
PassThrough>>>& instances)
{
add_device_operation_instances(instances,
device_grouped_conv_bwd_weight_xdl_c_shuffle_f32_instances<
3,
NDHWGC,
GKZYXC,
NDHWGK,
ConvBwdWeightFilter1x1Stride1Pad0,
BlockGemmPipelineScheduler::Intrawave,
BlockGemmPipelineVersion::v5>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
...@@ -7,7 +7,7 @@ if(GPU_TARGETS MATCHES "gfx9" OR DL_KERNELS) ...@@ -7,7 +7,7 @@ if(GPU_TARGETS MATCHES "gfx9" OR DL_KERNELS)
endif() endif()
add_gtest_executable(test_grouped_convnd_bwd_weight_interface_xdl test_grouped_convnd_bwd_weight_interface_xdl.cpp) add_gtest_executable(test_grouped_convnd_bwd_weight_interface_xdl test_grouped_convnd_bwd_weight_interface_xdl.cpp)
if(result EQUAL 0) if(result EQUAL 0)
target_link_libraries(test_grouped_convnd_bwd_weight_interface_xdl PRIVATE utility) target_link_libraries(test_grouped_convnd_bwd_weight_interface_xdl PRIVATE utility device_grouped_conv1d_bwd_weight_instance device_grouped_conv2d_bwd_weight_instance device_grouped_conv3d_bwd_weight_instance)
endif() endif()
add_gtest_executable(test_grouped_convnd_bwd_weight_interface_wmma test_grouped_convnd_bwd_weight_interface_wmma.cpp) add_gtest_executable(test_grouped_convnd_bwd_weight_interface_wmma test_grouped_convnd_bwd_weight_interface_wmma.cpp)
if(result EQUAL 0) if(result EQUAL 0)
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib> #include <cstdlib>
#include <iostream> #include <iostream>
...@@ -12,69 +12,143 @@ ...@@ -12,69 +12,143 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp"
#include "ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/convolution_parameter.hpp" #include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include <gtest/gtest.h> #include <gtest/gtest.h>
using F16 = ck::half_t; namespace ctl = ck::tensor_layout::convolution;
using F32 = float;
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using InDataType = ck::bhalf_t;
using WeiDataType = float;
using OutDataType = ck::bhalf_t;
using AccDataType = float;
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
using ConvolutionBackwardWeightSpecialization =
ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization;
static constexpr auto ConvBwdWeightDefault = ConvolutionBackwardWeightSpecialization::Default; static constexpr auto ConvBwdWeightDefault =
static constexpr auto Filter1x1Stride1Pad0 = ck::tensor_operation::device::ConvolutionBackwardWeightSpecialization::Default;
ConvolutionBackwardWeightSpecialization::Filter1x1Stride1Pad0; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
template <typename Tuple, ConvolutionBackwardWeightSpecialization ConvSpec> template <typename InputLay, typename WeightLay, typename OutputLay>
class TestGroupedConvndBwdWeight : public ::testing::Test struct CommonLayoutSetting
{ {
protected: using InputLayout = InputLay;
static constexpr ck::index_t NDimSpatial = 2; using WeightLayout = WeightLay;
using OutputLayout = OutputLay;
};
template <ck::index_t NDimSpatial>
struct CommonLayoutSettingSelector
: CommonLayoutSetting<ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GNDHWC>>,
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GKZYXC>>,
ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::GNWK,
ck::tensor_layout::convolution::GNHWK,
ck::tensor_layout::convolution::GNDHWK>>>
{
};
template <ck::index_t NDimSpatial>
using InputLayout = typename CommonLayoutSettingSelector<NDimSpatial>::InputLayout;
using InLayout = std::tuple_element_t<2, Tuple>; template <ck::index_t NDimSpatial>
using WeiLayout = std::tuple_element_t<1, Tuple>; using WeightLayout = typename CommonLayoutSettingSelector<NDimSpatial>::WeightLayout;
using OutLayout = std::tuple_element_t<0, Tuple>;
// clang-format off template <ck::index_t NDimSpatial>
using GroupedConvBwdWeightDeviceInstance = ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Xdl_CShuffle using OutputLayout = typename CommonLayoutSettingSelector<NDimSpatial>::OutputLayout;
//##########| Num| InLayout| WeiLayout| OutLayout| InData| WeiData| OutData| AccData| In| Wei| Out| ConvBackward| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransfer| CBlockTransfer|
//##########| Dim| | | | Type| Type| Type| Type| Elementwise| Elementwise| Elementwise| Weight| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| ClusterLengths| ScalarPerVector|
//##########| Spatial| | | | | | | | Operation| Operation| Operation| Specialization| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| MBlock_MPerBlock| NWaveNPerXdl|
//##########| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | NBlock_NPerBlock| |
< NDimSpatial, InLayout, WeiLayout,OutLayout, F16, F16, F16, F32, PassThrough, PassThrough, PassThrough, ConvSpec, 128, 32, 128, 4, 8, 32, 32, 1, 2, S<1, 4, 4, 8>, S<0, 3, 1, 2>, S<0, 2, 1, 3>, 2, 8, 1, true, S<1, 4, 16, 2>, S<0, 3, 1, 2>, S<0, 2, 1, 3>, 2, 8, 4, true, 1, 1, S<1, 32, 1, 4>, 8>;
// clang-format on
class TestGroupedConvndBwdWeight : public ::testing::Test
{
protected:
ck::utils::conv::ConvParam conv_param; ck::utils::conv::ConvParam conv_param;
ck::index_t split_k{2};
template <ck::index_t NDimSpatial> template <ck::index_t NDimSpatial>
bool Run() void RunReference(Tensor<InDataType>& in,
Tensor<WeiDataType>& wei_host_result,
Tensor<OutDataType>& out)
{
auto ref_conv = ck::tensor_operation::host::ReferenceConvBwdWeight<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
PassThrough,
PassThrough,
PassThrough>{};
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_argument = ref_conv.MakeArgument(in,
wei_host_result,
out,
conv_param.conv_filter_strides_,
conv_param.conv_filter_dilations_,
conv_param.input_left_pads_,
conv_param.input_right_pads_,
PassThrough{},
PassThrough{},
PassThrough{},
{},
{},
{});
ref_invoker.Run(ref_argument);
}
template <ck::index_t NDimSpatial>
bool PerformConvWeight(ck::index_t split_k)
{ {
bool passed{true};
const auto in_g_n_c_wis_desc = const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>( ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<
conv_param); InputLayout<NDimSpatial>>(conv_param);
const auto wei_g_k_c_xs_desc = const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>( ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<
conv_param); WeightLayout<NDimSpatial>>(conv_param);
const auto out_g_n_k_wos_desc = const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>( ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<
conv_param); OutputLayout<NDimSpatial>>(conv_param);
Tensor<InDataType> in(in_g_n_c_wis_desc);
Tensor<WeiDataType> wei_host_result(wei_g_k_c_xs_desc);
Tensor<WeiDataType> wei_device_result(wei_g_k_c_xs_desc);
Tensor<OutDataType> out(out_g_n_k_wos_desc);
in.GenerateTensorValue(GeneratorTensor_2<InDataType>{-5, 5});
out.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
DeviceMem in_device_buf(sizeof(InDataType) * in.mDesc.GetElementSpaceSize());
DeviceMem wei_device_buf(sizeof(WeiDataType) *
wei_device_result.mDesc.GetElementSpaceSize());
DeviceMem out_device_buf(sizeof(OutDataType) * out.mDesc.GetElementSpaceSize());
in_device_buf.ToDevice(in.mData.data());
out_device_buf.ToDevice(out.mData.data());
// init to 0
wei_device_buf.SetZero();
std::array<ck::index_t, NDimSpatial + 3> input_lengths{}; std::array<ck::index_t, NDimSpatial + 3> input_lengths{};
std::array<ck::index_t, NDimSpatial + 3> filter_lengths{};
std::array<ck::index_t, NDimSpatial + 3> output_lengths{};
std::array<ck::index_t, NDimSpatial + 3> input_strides{}; std::array<ck::index_t, NDimSpatial + 3> input_strides{};
std::array<ck::index_t, NDimSpatial + 3> filter_lengths{};
std::array<ck::index_t, NDimSpatial + 3> weights_strides{}; std::array<ck::index_t, NDimSpatial + 3> weights_strides{};
std::array<ck::index_t, NDimSpatial + 3> output_lengths{};
std::array<ck::index_t, NDimSpatial + 3> output_strides{}; std::array<ck::index_t, NDimSpatial + 3> output_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_strides{}; std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_dilations{}; std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
...@@ -94,86 +168,261 @@ class TestGroupedConvndBwdWeight : public ::testing::Test ...@@ -94,86 +168,261 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
range_copy(conv_param.input_left_pads_, begin(input_left_pads)); range_copy(conv_param.input_left_pads_, begin(input_left_pads));
range_copy(conv_param.input_right_pads_, begin(input_right_pads)); range_copy(conv_param.input_right_pads_, begin(input_right_pads));
auto conv = GroupedConvBwdWeightDeviceInstance{}; RunReference<NDimSpatial>(in, wei_host_result, out);
auto argument = conv.MakeArgument(nullptr, using DeviceOp = ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Xdl_CShuffle<
nullptr, NDimSpatial,
nullptr, ck::tuple_element_t<NDimSpatial - 1,
input_lengths, ck::Tuple<ck::tensor_layout::convolution::GNWC,
input_strides, ck::tensor_layout::convolution::GNHWC,
filter_lengths, ck::tensor_layout::convolution::GNDHWC>>,
weights_strides, ck::tuple_element_t<NDimSpatial - 1,
output_lengths, ck::Tuple<ck::tensor_layout::convolution::GKXC,
output_strides, ck::tensor_layout::convolution::GKYXC,
conv_filter_strides, ck::tensor_layout::convolution::GKZYXC>>,
conv_filter_dilations, ck::tuple_element_t<NDimSpatial - 1,
input_left_pads, ck::Tuple<ck::tensor_layout::convolution::GNWK,
input_right_pads, ck::tensor_layout::convolution::GNHWK,
PassThrough{}, ck::tensor_layout::convolution::GNDHWK>>,
PassThrough{}, InDataType, // InDataType
PassThrough{}, WeiDataType, // WeiDataType
split_k); OutDataType, // OutDataType
return conv.IsSupportedArgument(argument); AccDataType, // AccDataType
PassThrough, // InElementwiseOperation
PassThrough, // WeiElementwiseOperation
PassThrough, // OutElementwiseOperation
ConvBwdWeightDefault, // ConvolutionBackwardWeightSpecialization
64, // BlockSize
16, // MPerBlock
16, // NPerBlock
32, // K0PerBlock
8, // K1
16, // MPerXdl
16, // NPerXdl
1, // MXdlPerWave
1, // NXdlPerWave
S<4, 16, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<2, 0, 1>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
1, // ABlockTransferSrcVectorDim
1, // ABlockTransferSrcScalarPerVector
4, // ABlockTransferDstScalarPerVector_K1
false, // ABlockLdsAddExtraM
S<4, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<2, 0, 1>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
1, // BBlockTransferSrcVectorDim
1, // BBlockTransferSrcScalarPerVector
4, // BBlockTransferDstScalarPerVector_K1
false, // BBlockLdsAddExtraN
1, // CShuffleMXdlPerWavePerShuffle
1, // CShuffleNXdlPerWavePerShuffle
S<1, 8, 1, 8>, // CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
1>; // CBlockTransferScalarPerVector_NWaveNPerXdl
auto conv_ptr = DeviceOp{};
auto argument =
conv_ptr.MakeArgument(static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
input_lengths,
input_strides,
filter_lengths,
weights_strides,
output_lengths,
output_strides,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads,
PassThrough{},
PassThrough{},
PassThrough{},
split_k);
auto invoker_ptr = conv_ptr.MakeInvoker();
if(conv_ptr.IsSupportedArgument(argument))
{
float avg_time = invoker_ptr.Run(argument, StreamConfig{nullptr, false});
wei_device_buf.FromDevice(wei_device_result.mData.data());
passed &= ck::utils::check_err(
wei_device_result.mData, wei_host_result.mData, "Error: incorrect results!");
std::size_t flop = conv_param.GetFlops() +
3 * conv_param.GetOutputByte<WeiDataType>() / sizeof(WeiDataType);
std::size_t num_bytes = conv_param.GetByte<InDataType, WeiDataType, OutDataType>() +
conv_param.GetOutputByte<WeiDataType>();
float tflops = static_cast<float>(flop) / 1.E9 / avg_time;
float gb_per_sec = num_bytes / 1.E6 / avg_time;
std::cout << "Perf: " << std::setw(10) << avg_time << " ms, " << tflops << " TFlops, "
<< gb_per_sec << " GB/s, "
<< "split_k " << split_k << std::endl;
}
return passed;
}
template <ck::index_t NDimSpatial>
void Run()
{
bool pass = true;
for(auto split_k : {1, 2})
{
pass = pass && PerformConvWeight<NDimSpatial>(split_k);
EXPECT_TRUE(pass);
}
} }
}; };
using GNHWC = ck::tensor_layout::convolution::GNHWC; TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_1_Filter_1x1)
using NHWGC = ck::tensor_layout::convolution::NHWGC; {
this->conv_param = {
1, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<1>();
}
using GKYXC = ck::tensor_layout::convolution::GKYXC; TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_1_Filter_3x3)
{
this->conv_param = {
1, 2, 4, 192, 192, {3, 3, 3}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<1>();
}
using GNHWK = ck::tensor_layout::convolution::GNHWK; TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_2_Filter_1x1)
using NHWGK = ck::tensor_layout::convolution::NHWGK; {
this->conv_param = {
2, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<2>();
}
using KernelTypes = TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_2_Filter_3x3)
::testing::Types<std::tuple<GNHWK, GKYXC, GNHWC>, std::tuple<NHWGK, GKYXC, NHWGC>>; {
this->conv_param = {
2, 2, 4, 192, 192, {3, 3, 3}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<2>();
}
template <typename Tuple> TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_3_Filter_1x1)
class TestGroupedConvndBwdWeightDefault
: public TestGroupedConvndBwdWeight<Tuple, ConvBwdWeightDefault>
{ {
}; this->conv_param = {
3, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<3>();
}
template <typename Tuple> TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_3_Filter_3x3)
class TestGroupedConvndBwdWeightFilter1x1
: public TestGroupedConvndBwdWeight<Tuple, Filter1x1Stride1Pad0>
{ {
}; this->conv_param = {
3, 2, 4, 192, 192, {3, 3, 3}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<3>();
}
TYPED_TEST_SUITE(TestGroupedConvndBwdWeightDefault, KernelTypes); TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_1_Stride_1x1)
TYPED_TEST_SUITE(TestGroupedConvndBwdWeightFilter1x1, KernelTypes); {
this->conv_param = {
1, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<1>();
}
TYPED_TEST(TestGroupedConvndBwdWeightFilter1x1, SpecializationCheck) TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_1_Stride_2x2)
{ {
// Check filter 3,3 instead of 1,1 this->conv_param = {
this->conv_param = {2, 2, 4, 192, 192, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {0, 0}, {0, 0}}; 1, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
bool is_supported = this->template Run<2>(); this->template Run<1>();
EXPECT_FALSE(is_supported); }
// Check strides 2,2 instead of 1,1 TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_2_Stride_1x1)
this->conv_param = {2, 2, 4, 192, 192, {1, 1}, {28, 28}, {2, 2}, {1, 1}, {0, 0}, {0, 0}}; {
is_supported = this->template Run<2>(); this->conv_param = {
EXPECT_FALSE(is_supported); 2, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<2>();
}
// Check with pad TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_2_Stride_2x2)
this->conv_param = {2, 2, 4, 192, 192, {1, 1}, {28, 28}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}; {
is_supported = this->template Run<2>(); this->conv_param = {
EXPECT_FALSE(is_supported); 2, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<2>();
}
// Supported version TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_3_Stride_1x1)
this->conv_param = {2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}}; {
is_supported = this->template Run<2>(); this->conv_param = {
EXPECT_TRUE(is_supported); 3, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<3>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_3_Stride_2x2)
{
this->conv_param = {
3, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
this->template Run<3>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_1_WithPadding)
{
this->conv_param = {
1, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}};
this->template Run<1>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_2_WithPadding)
{
this->conv_param = {
2, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}};
this->template Run<2>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_3_WithPadding)
{
this->conv_param = {
3, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}};
this->template Run<3>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_1_SupportedVersion)
{
this->conv_param = {
1, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}};
this->template Run<1>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_2_SupportedVersion)
{
this->conv_param = {
2, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}};
this->template Run<2>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_3_SupportedVersion)
{
this->conv_param = {
3, 2, 4, 192, 192, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}};
this->template Run<3>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_1_VectorLoadForA)
{
this->conv_param = {1, 2, 128, 129, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}};
this->template Run<1>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_2_VectorLoadForA)
{
this->conv_param = {2, 2, 128, 129, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}};
this->template Run<2>();
}
TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_1_VectorLoadForB_E_DS)
{
this->conv_param = {1, 2, 128, 128, 257, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}};
this->template Run<1>();
} }
TYPED_TEST(TestGroupedConvndBwdWeightDefault, VectorLoadCheck) TEST_F(TestGroupedConvndBwdWeight, TestGroupedConvndBwdWeight_NDimSpatial_2_VectorLoadForB_E_DS)
{ {
// vector load for A
this->conv_param = {2, 2, 128, 129, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}};
bool is_supported = this->template Run<2>();
EXPECT_FALSE(is_supported);
// vector load for B, E, Ds
this->conv_param = {2, 2, 128, 128, 257, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}}; this->conv_param = {2, 2, 128, 128, 257, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}};
is_supported = this->template Run<2>(); this->template Run<2>();
EXPECT_FALSE(is_supported);
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment