Commit 41852668 authored by Jing Zhang's avatar Jing Zhang
Browse files

add activ_type as arguments

parent e5c9f039
......@@ -90,9 +90,9 @@
#endif
// pass tensor descriptor by value or void*
#define CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_VALUE 1
#define CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_VALUE 0
#define CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_VOID_POINTER 0
#define CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR 0
#define CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR 1
// merge transformation use magic number division
#define CK_EXPERIMENTAL_MERGE_USE_MAGIC_DIVISION 0
......
......@@ -6,7 +6,7 @@
template <typename TInWei,
typename TAcc,
typename TOut,
ck::index_t activ_type,
ck::ActivTypeEnum_t activ_type,
typename InLengths,
typename WeiLengths,
typename MaxLengths,
......
......@@ -87,16 +87,22 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
#if CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR
const auto Hop = Number<(Ho + HoPerBlock - 1) / HoPerBlock * HoPerBlock>{};
const auto Wop = Number<(Wo + WoPerBlock - 1) / WoPerBlock * WoPerBlock>{};
const auto OutRightPadH = Hop - Ho;
const auto OutRightPadW = Wop - Wo;
const auto OutRightPadHx = Number<OutRightPadH * 2>{};
const auto OutRightPadWx = Number<OutRightPadW * 2>{};
#else
const auto Hop = (Ho + HoPerBlock - 1) / HoPerBlock * HoPerBlock;
const auto Wop = (Wo + WoPerBlock - 1) / WoPerBlock * WoPerBlock;
#endif
const auto OutRightPadH = Hop - Ho;
const auto OutRightPadW = Wop - Wo;
const auto OutRightPadHx = OutRightPadH * 2;
const auto OutRightPadWx = OutRightPadW * 2;
#endif
const auto InLeftPadH = in_left_pads[I0];
const auto InLeftPadW = in_left_pads[I1];
......@@ -382,7 +388,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_H2x2_W0_W1_W2x2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
true>;
true,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......@@ -410,7 +417,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_H2x2_W0_W1_W2x2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
false>;
false,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......@@ -461,7 +469,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_H2x2_W0_W1_W2x2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
true>;
true,
activ_type>;
ave_time = launch_and_time_kernel(
kernel,
......@@ -495,7 +504,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_H2x2_W0_W1_W2x2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
false>;
false,
activ_type>;
ave_time = launch_and_time_kernel(
kernel,
......@@ -526,7 +536,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
static_assert(c_k0_k1_n_h0_h1_h2_w0_w1_w2_grid_desc.IsKnownAtCompileTime(), "");
static_assert(c_blockid_to_k_n_h_w_block_cluster_adaptor.IsKnownAtCompileTime(), "");
const auto kernel = kernel_gemm_dlops_v2_resize_add<
const auto kernel = kernel_gemm_dlops_v3_resize_add<
GridwiseGemm,
FloatAB,
FloatC,
......@@ -535,7 +545,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_H2x2_W0_W1_W2x2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
has_main_e0_block_loop>;
has_main_e0_block_loop,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......
......@@ -337,7 +337,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<BGridDesc_E0_E1_N_H0_H1_H2_W0_W1_W2_E2>,
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
true>;
true,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......@@ -363,7 +364,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<BGridDesc_E0_E1_N_H0_H1_H2_W0_W1_W2_E2>,
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
false>;
false,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......@@ -408,7 +410,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<BGridDesc_E0_E1_N_H0_H1_H2_W0_W1_W2_E2>,
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
true>;
true,
activ_type>;
ave_time = launch_and_time_kernel(
kernel,
......@@ -440,7 +443,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<BGridDesc_E0_E1_N_H0_H1_H2_W0_W1_W2_E2>,
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
false>;
false,
activ_type>;
ave_time = launch_and_time_kernel(
kernel,
......@@ -469,14 +473,15 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
static_assert(c_blockid_to_k_n_h_w_block_cluster_adaptor.IsKnownAtCompileTime(), "");
const auto kernel =
kernel_gemm_dlops_v2<GridwiseGemm,
kernel_gemm_dlops_v3<GridwiseGemm,
FloatAB,
FloatC,
remove_reference_t<AGridDesc_E0_E1_K0_K1_E2>,
remove_reference_t<BGridDesc_E0_E1_N_H0_H1_H2_W0_W1_W2_E2>,
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
has_main_e0_block_loop>;
has_main_e0_block_loop,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......
......@@ -27,7 +27,7 @@ template <ck::index_t BlockSize,
ck::index_t ABlockTransferDstScalarPerVector_E2,
ck::index_t BThreadTransferSrcScalarPerVector_E2,
ck::index_t CThreadTransferDstScalarPerVector_K,
ck::index_t activ_type>
ck::ActivTypeEnum_t activ_type>
struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0hwk1_maxpool
{
template <typename... Wei,
......@@ -88,16 +88,22 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
#if CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR
const auto Hop = Number<(Ho + HoPerBlock - 1) / HoPerBlock * HoPerBlock>{};
const auto Wop = Number<(Wo + WoPerBlock - 1) / WoPerBlock * WoPerBlock>{};
const auto OutRightPadH = Hop - Ho;
const auto OutRightPadW = Wop - Wo;
const auto OutRightPadHx = Number<OutRightPadH / 2>{};
const auto OutRightPadWx = Number<OutRightPadW / 2>{};
#else
const auto Hop = (Ho + HoPerBlock - 1) / HoPerBlock * HoPerBlock;
const auto Wop = (Wo + WoPerBlock - 1) / WoPerBlock * WoPerBlock;
#endif
const auto OutRightPadH = Hop - Ho;
const auto OutRightPadW = Wop - Wo;
const auto OutRightPadHx = OutRightPadH / 2;
const auto OutRightPadWx = OutRightPadW / 2;
#endif
const auto InLeftPadH = in_left_pads[I0];
const auto InLeftPadW = in_left_pads[I1];
......@@ -380,7 +386,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_Hx_W0_W1_Wx>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
true>;
true,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......@@ -409,7 +416,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_Hx_W0_W1_Wx>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
false>;
false,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......@@ -461,7 +469,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_Hx_W0_W1_Wx>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
true>;
true,
activ_type>;
ave_time = launch_and_time_kernel(
kernel,
......@@ -497,7 +506,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_Hx_W0_W1_Wx>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
false>;
false,
activ_type>;
ave_time = launch_and_time_kernel(
kernel,
......@@ -529,7 +539,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
static_assert(c_k0_k1_n_h0_h1_h2_w0_w1_w2_grid_desc.IsKnownAtCompileTime(), "");
static_assert(c_blockid_to_k_n_h_w_block_cluster_adaptor.IsKnownAtCompileTime(), "");
const auto kernel = kernel_gemm_dlops_v2_maxpool<
const auto kernel = kernel_gemm_dlops_v3_maxpool<
GridwiseGemm,
FloatAB,
FloatC,
......@@ -538,7 +548,8 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
remove_reference_t<CGridDesc_K0_K1_N_H0_H1_H2_W0_W1_W2>,
remove_reference_t<DGridDesc_K0_K1_N_H0_H1_Hx_W0_W1_Wx>,
remove_reference_t<CBlockIdToBlockClusterAdaptor_K_N_H_W>,
has_main_e0_block_loop>;
has_main_e0_block_loop,
activ_type>;
ave_time = launch_and_time_kernel(kernel,
nrepeat,
......
......@@ -15,7 +15,7 @@
#include "device_tensor.hpp"
#include "device_convolution_add_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp"
#define USE_DYNAMIC_MODE 1
#define USE_DYNAMIC_MODE 0
#define USE_CONV_FWD_V5R1_NCHWC 1
enum ConvForwardAlgo
......
......@@ -15,7 +15,7 @@
#include "device_tensor.hpp"
#include "device_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp"
#define USE_DYNAMIC_MODE 1
#define USE_DYNAMIC_MODE 0
#define USE_CONV_FWD_V5R1_NCHWC 1
enum ConvForwardAlgo
......
......@@ -15,7 +15,7 @@
#include "device_tensor.hpp"
#include "device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1.hpp"
#define USE_DYNAMIC_MODE 1
#define USE_DYNAMIC_MODE 0
#define USE_CONV_FWD_V5R1_NCHWC 1
enum ConvForwardAlgo
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment