Commit 05c484e2 authored by Chao Liu's avatar Chao Liu
Browse files

adding group conv

parent 3474c777
...@@ -93,7 +93,7 @@ void print_helper_msg() ...@@ -93,7 +93,7 @@ void print_helper_msg()
<< "Following arguments (depending on number of spatial dims):\n" << "Following arguments (depending on number of spatial dims):\n"
<< " N, K, C, \n" << " N, K, C, \n"
<< " <filter spatial dimensions>, (ie Y, X for 2D)\n" << " <filter spatial dimensions>, (ie Y, X for 2D)\n"
<< " <in_n_hi_wi_c image spatial dimensions>, (ie Hi, Wi for 2D)\n" << " <input image spatial dimensions>, (ie Hi, Wi for 2D)\n"
<< " <strides>, (ie Sy, Sx for 2D)\n" << " <strides>, (ie Sy, Sx for 2D)\n"
<< " <dilations>, (ie Dy, Dx for 2D)\n" << " <dilations>, (ie Dy, Dx for 2D)\n"
<< " <left padding>, (ie LeftPy, LeftPx for 2D)\n" << " <left padding>, (ie LeftPy, LeftPx for 2D)\n"
......
add_example_executable(example_group_convnd_fwd_bias_relu_xdl_fp16 group_convnd_fwd_bias_relu_xdl_fp16.cpp)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "../09_convnd_fwd/convnd_fwd_common.hpp"
#include "ck/tensor_operation/gpu/device/device_convnd_fwd_nwc_kxc_nwk_xdl.hpp"
#include "ck/tensor_operation/gpu/device/device_convnd_fwd_multiple_d_nwc_kxc_nwk_xdl_cshuffle.hpp"
using InDataType = ck::half_t;
using WeiDataType = ck::half_t;
using OutDataType = ck::half_t;
using AccDataType = float;
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::UnaryConvert;
using CShuffleDataType = ck::half_t;
static constexpr auto ConvSpec =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
static constexpr auto GemmSpec = ck::tensor_operation::device::GemmSpecialization::MNKPadding;
template <ck::index_t NDimSpatial>
using DeviceConvNDFwdInstance =
ck::tensor_operation::device::DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle<
NDimSpatial, //
InDataType, //
WeiDataType, //
AccDataType, //
CShuffleDataType, //
ck::Tuple<>, //
OutDataType, //
InElementOp, // Input Elementwise Operation
WeiElementOp, // Weights Elementwise Operation
OutElementOp, // Output Elementwise Operation
ConvSpec, // ConvForwardSpecialization
GemmSpec, // GemmSpecialization
1, //
256, // BlockSize
128, // MPerBlock
256, // NPerBlock
32, // KPerBlock
8, // K1
32, // MPerXdl
32, // NPerXdl
2, // MXdlPerWave
4, // NXdlPerWave
S<4, 64, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
2, // ABlockTransferSrcVectorDim
8, // ABlockTransferSrcScalarPerVector
8, // ABlockTransferDstScalarPerVector_K1
1, // ABlockLdsExtraM
S<4, 64, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim
8, // BBlockTransferSrcScalarPerVector
8, // BBlockTransferDstScalarPerVector_K1
1, // BBlockLdsExtraN
1,
1,
S<1, 32, 1, 8>,
8>;
int main(int argc, char* argv[])
{
print_helper_msg();
bool do_verification = true;
int init_method = 1;
bool time_kernel = false;
int num_dim_spatial = 2;
ck::utils::conv::ConvParam params{
2, 128, 256, 192, {3, 3}, {71, 71}, {2, 2}, {1, 1}, {1, 1}, {1, 1}};
if(argc == 1)
{
// use default
}
else if(argc == 4)
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
time_kernel = std::stoi(argv[3]);
}
else
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
time_kernel = std::stoi(argv[3]);
num_dim_spatial = std::stoi(argv[4]);
params = parse_conv_params(num_dim_spatial, 5, argv);
}
const auto in_element_op = InElementOp{};
const auto wei_element_op = WeiElementOp{};
const auto out_element_op = OutElementOp{};
if(num_dim_spatial == 1)
{
return run_conv_fwd<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp,
DeviceConvNDFwdInstance<1>>(do_verification,
init_method,
time_kernel,
params,
in_element_op,
wei_element_op,
out_element_op);
}
else if(num_dim_spatial == 2)
{
return run_conv_fwd<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp,
DeviceConvNDFwdInstance<2>>(do_verification,
init_method,
time_kernel,
params,
in_element_op,
wei_element_op,
out_element_op);
}
else if(num_dim_spatial == 3)
{
return run_conv_fwd<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp,
DeviceConvNDFwdInstance<3>>(do_verification,
init_method,
time_kernel,
params,
in_element_op,
wei_element_op,
out_element_op);
}
return 0;
}
...@@ -44,3 +44,4 @@ add_subdirectory(24_batched_gemm_c_permute) ...@@ -44,3 +44,4 @@ add_subdirectory(24_batched_gemm_c_permute)
add_subdirectory(25_gemm_bias_c_permute) add_subdirectory(25_gemm_bias_c_permute)
add_subdirectory(26_contraction) add_subdirectory(26_contraction)
add_subdirectory(27_layernorm) add_subdirectory(27_layernorm)
add_subdirectory(28_group_convnd_fwd_bias_relu)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
// GEMM:
// input : input image A[N, Hi, Wi, C],
// input : weight B[K, Y, X, C],
// input : D0[N, Ho, Wo, K], D1[N, Ho, Wo, K], ...
// output : output image E[N, Ho, Wo, K]
// C = a_op(A) * b_op(B)
// E = cde_op(C, D0, D1, ...)
// Assume:
// D0, D1, ... and E have the same layout
template <ck::index_t NDimSpatial,
typename ALayout,
typename BLayout,
typename DELayout,
typename ADataType,
typename BDataType,
typename DsDataType,
typename EDataType,
typename AElementwiseOperation,
typename BElementwiseOperation,
typename CDEElementwiseOperation>
struct DeviceConvFwdMultipleD : public BaseOperator
{
static constexpr index_t NumDTensor = DsDataType::Size();
virtual std::unique_ptr<BaseArgument>
MakeArgumentPointer(const ADataType* p_a,
const BDataType* p_b,
EDataType* p_e,
ck::index_t N,
ck::index_t K,
ck::index_t C,
std::vector<ck::index_t> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads,
std::vector<ck::index_t> input_right_pads,
AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op,
CDEElementwiseOperation cde_element_op) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
};
} // namespace device
} // namespace tensor_operation
} // namespace ck
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "ck/tensor_description/tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_conv_fwd.hpp" #include "ck/tensor_operation/gpu/device/device_conv_fwd_multiple_d.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp" #include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp" #include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
...@@ -151,25 +151,27 @@ template <index_t NDimSpatial, ...@@ -151,25 +151,27 @@ template <index_t NDimSpatial,
index_t CDEBlockTransferScalarPerVector_NPerBlock, index_t CDEBlockTransferScalarPerVector_NPerBlock,
LoopScheduler LoopSched = make_default_loop_scheduler()> LoopScheduler LoopSched = make_default_loop_scheduler()>
struct DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle struct DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
: public DeviceConvFwd<NDimSpatial, : public DeviceConvFwdMultipleD<
ck::tuple_element_t<NDimSpatial - 1, NDimSpatial,
ck::Tuple<ck::tensor_layout::convolution::NWC, ck::tuple_element_t<NDimSpatial - 1,
ck::tensor_layout::convolution::NHWC, ck::Tuple<ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::NDHWC>>, ck::tensor_layout::convolution::NHWC,
ck::tuple_element_t<NDimSpatial - 1, ck::tensor_layout::convolution::NDHWC>>,
ck::Tuple<ck::tensor_layout::convolution::KXC, ck::tuple_element_t<NDimSpatial - 1,
ck::tensor_layout::convolution::KYXC, ck::Tuple<ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::KZYXC>>, ck::tensor_layout::convolution::KYXC,
ck::tuple_element_t<NDimSpatial - 1, ck::tensor_layout::convolution::KZYXC>>,
ck::Tuple<ck::tensor_layout::convolution::NWK, ck::tuple_element_t<NDimSpatial - 1,
ck::tensor_layout::convolution::NHWK, ck::Tuple<ck::tensor_layout::convolution::NWK,
ck::tensor_layout::convolution::NDHWK>>, ck::tensor_layout::convolution::NHWK,
ADataType, ck::tensor_layout::convolution::NDHWK>>,
BDataType, ADataType,
EDataType, BDataType,
AElementwiseOperation, DsDataType,
BElementwiseOperation, EDataType,
CDEElementwiseOperation> AElementwiseOperation,
BElementwiseOperation,
CDEElementwiseOperation>
{ {
using DeviceOp = DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle; using DeviceOp = DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle;
...@@ -1130,9 +1132,9 @@ struct DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle ...@@ -1130,9 +1132,9 @@ struct DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
static auto MakeInvoker() { return Invoker{}; } static auto MakeInvoker() { return Invoker{}; }
std::unique_ptr<BaseArgument> std::unique_ptr<BaseArgument>
MakeArgumentPointer(const void* p_in_grid, MakeArgumentPointer(const ADataType* p_in_grid,
const void* p_wei_grid, const BDataType* p_wei_grid,
void* p_out_grid, EDataType* p_out_grid,
index_t N, index_t N,
index_t K, index_t K,
index_t C, index_t C,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment