Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
1f8e8231
Commit
1f8e8231
authored
Dec 05, 2021
by
Chao Liu
Browse files
refactor
parent
aa0a891a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
62 additions
and
94 deletions
+62
-94
device_operation/include/device_conv2d_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp
...ice_conv2d_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp
+52
-79
device_operation/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
...peration/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
+1
-6
example/4_conv_xdl_bias_relu_add/conv_xdl_bias_relu_add.cpp
example/4_conv_xdl_bias_relu_add/conv_xdl_bias_relu_add.cpp
+9
-9
No files found.
example/4_conv_xdl_bias_relu_add
/include/device_conv_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp
→
device_operation
/include/device_conv
2d
_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp
View file @
1f8e8231
#ifndef DEVICE_CONV_FWD_XDL_BIAS_ACTIVATION_ADD_NHWC_KYXC_NHWK_HPP
#ifndef DEVICE_CONV
2D
_FWD_XDL_BIAS_ACTIVATION_ADD_NHWC_KYXC_NHWK_HPP
#define DEVICE_CONV_FWD_XDL_BIAS_ACTIVATION_ADD_NHWC_KYXC_NHWK_HPP
#define DEVICE_CONV
2D
_FWD_XDL_BIAS_ACTIVATION_ADD_NHWC_KYXC_NHWK_HPP
#include <iostream>
#include <iostream>
#include <sstream>
#include "device.hpp"
#include "device.hpp"
#include "device_base.hpp"
#include "device_base.hpp"
#include "device_conv.hpp"
#include "device_conv.hpp"
...
@@ -10,13 +11,13 @@
...
@@ -10,13 +11,13 @@
#include "tensor_descriptor.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_xdlops_v2r5.hpp"
#include "gridwise_gemm_xdlops_v2r5.hpp"
#include "example/4_conv_xdl_bias_relu_add/include/device_conv_fwd_xdl_bias_activation_add.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
namespace
device
{
namespace
device
{
// specialization for 2D conv: in[n, hi, wi, c] * wei[k, y, x, c] = out[n, ho, wo, k]
// out[N, Ho, Wo, K] =
// activate(in[N, Hi, Wi, C] * wei[K, Y, X, C] + bias[K]) + residual[N, Ho, Wo, K]
template
<
typename
InDataType
,
template
<
typename
InDataType
,
typename
WeiDataType
,
typename
WeiDataType
,
typename
OutDataType
,
typename
OutDataType
,
...
@@ -51,49 +52,12 @@ template <typename InDataType,
...
@@ -51,49 +52,12 @@ template <typename InDataType,
ck
::
index_t
CThreadTransferDstScalarPerVector
,
ck
::
index_t
CThreadTransferDstScalarPerVector
,
bool
ABlockLdsAddExtraM
,
bool
ABlockLdsAddExtraM
,
bool
BBlockLdsAddExtraN
>
bool
BBlockLdsAddExtraN
>
struct
DeviceConvFwdXdl_bias_activation_add
<
struct
DeviceConv2dFwdXdl_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
2
,
// ck::index_t NDimSpatial,
:
public
BaseOperator
InDataType
,
// typename InDataType,
WeiDataType
,
// typename WeiDataType,
OutDataType
,
// typename OutDataType,
AccDataType
,
// typename AccDataType,
ck
::
tensor_layout
::
convolution
::
NHWC
,
// typename InLayout,
ck
::
tensor_layout
::
convolution
::
KYXC
,
// typename WeiLayout,
ck
::
tensor_layout
::
convolution
::
NHWK
,
// typename OutLayout,
InElementwiseOperation
,
// typename InElementwiseOperation,
WeiElementwiseOperation
,
// typename WeiElementwiseOperation,
OutElementwiseOperation
,
// typename OutElementwiseOperation,
BlockSize
,
// ck::index_t BlockSize,
MPerBlock
,
// ck::index_t MPerBlock,
NPerBlock
,
// ck::index_t NPerBlock,
K0PerBlock
,
// ck::index_t K0PerBlock,
K1
,
// ck::index_t K1,
MPerXDL
,
// ck::index_t MPerXDL,
NPerXDL
,
// ck::index_t NPerXDL,
MXdlPerWave
,
// ck::index_t MXdlPerWave,
NXdlPerWave
,
// ck::index_t NXdlPerWave,
ABlockTransferThreadSliceLengths_K0_M_K1
,
// typename ABlockTransferThreadSliceLengths_K0_M_K1,
ABlockTransferThreadClusterLengths_K0_M_K1
,
// typename
// ABlockTransferThreadClusterLengths_K0_M_K1,
ABlockTransferThreadClusterArrangeOrder
,
// typename ABlockTransferThreadClusterArrangeOrder,
ABlockTransferSrcAccessOrder
,
// typename ABlockTransferSrcAccessOrder,
ABlockTransferSrcVectorDim
,
// ck::index_t ABlockTransferSrcVectorDim,
ABlockTransferSrcScalarPerVector
,
// ck::index_t ABlockTransferSrcScalarPerVector,
ABlockTransferDstScalarPerVector_K1
,
// ck::index_t ABlockTransferDstScalarPerVector_K1,
BBlockTransferThreadSliceLengths_K0_N_K1
,
// typename BBlockTransferThreadSliceLengths_K0_N_K1,
BBlockTransferThreadClusterLengths_K0_N_K1
,
// typename
// BBlockTransferThreadClusterLengths_K0_N_K1,
BBlockTransferThreadClusterArrangeOrder
,
// typename BBlockTransferThreadClusterArrangeOrder,
BBlockTransferSrcAccessOrder
,
// typename BBlockTransferSrcAccessOrder,
BBlockTransferSrcVectorDim
,
// ck::index_t BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector
,
// ck::index_t BBlockTransferSrcScalarPerVector,
BBlockTransferDstScalarPerVector_K1
,
// ck::index_t BBlockTransferDstScalarPerVector_K1,
CThreadTransferSrcDstVectorDim
,
// ck::index_t CThreadTransferSrcDstVectorDim,
CThreadTransferDstScalarPerVector
,
// ck::index_t CThreadTransferDstScalarPerVector,
ABlockLdsAddExtraM
,
// bool ABlockLdsAddExtraM,
BBlockLdsAddExtraN
// bool BBlockLdsAddExtraN>
>
:
public
BaseOperator
{
{
using
DeviceOp
=
DeviceConv2dFwdXdl_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
;
using
ADataType
=
InDataType
;
using
ADataType
=
InDataType
;
using
BDataType
=
WeiDataType
;
using
BDataType
=
WeiDataType
;
using
CDataType
=
OutDataType
;
using
CDataType
=
OutDataType
;
...
@@ -241,7 +205,7 @@ struct DeviceConvFwdXdl_bias_activation_add<
...
@@ -241,7 +205,7 @@ struct DeviceConvFwdXdl_bias_activation_add<
// C0: bias tensor: assume a contiguous vector
// C0: bias tensor: assume a contiguous vector
const
auto
bias_grid_desc_gemmm_gemmn
=
const
auto
bias_grid_desc_gemmm_gemmn
=
make_naive_tensor_descriptor
(
make_tuple
(
GemmM
,
GemmN
),
make_tuple
(
0
,
1
));
make_naive_tensor_descriptor
(
make_tuple
(
GemmM
,
GemmN
),
make_tuple
(
I
0
,
I
1
));
// C1: residual tensor: assume same layout as output tensor
// C1: residual tensor: assume same layout as output tensor
const
auto
resi_grid_desc_gemmm_gemmn
=
out_gemmm_gemmn_grid_desc
;
const
auto
resi_grid_desc_gemmm_gemmn
=
out_gemmm_gemmn_grid_desc
;
...
@@ -407,17 +371,17 @@ struct DeviceConvFwdXdl_bias_activation_add<
...
@@ -407,17 +371,17 @@ struct DeviceConvFwdXdl_bias_activation_add<
wei_element_op_
{
wei_element_op
},
wei_element_op_
{
wei_element_op
},
out_element_op_
{
out_element_op
}
out_element_op_
{
out_element_op
}
{
{
const
auto
descs
=
DeviceConvFwdXdl_bias_activation_add
::
const
auto
descs
=
MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N
(
N
,
DeviceOp
::
MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N
(
N
,
K
,
K
,
C
,
C
,
input_spatial_lengths
,
input_spatial_lengths
,
filter_spatial_lengths
,
filter_spatial_lengths
,
output_spatial_lengths
,
output_spatial_lengths
,
conv_filter_strides
,
conv_filter_strides
,
conv_filter_dilations
,
conv_filter_dilations
,
input_left_pads
,
input_left_pads
,
input_right_pads
);
input_right_pads
);
a_grid_desc_k0_m_k1_
=
descs
[
I0
];
a_grid_desc_k0_m_k1_
=
descs
[
I0
];
b_grid_desc_k0_n_k1_
=
descs
[
I1
];
b_grid_desc_k0_n_k1_
=
descs
[
I1
];
...
@@ -466,7 +430,7 @@ struct DeviceConvFwdXdl_bias_activation_add<
...
@@ -466,7 +430,7 @@ struct DeviceConvFwdXdl_bias_activation_add<
// Invoker
// Invoker
struct
Invoker
:
public
BaseInvoker
struct
Invoker
:
public
BaseInvoker
{
{
using
Argument
=
Device
ConvFwdXdl_bias_activation_add
::
Argument
;
using
Argument
=
Device
Op
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
)
{
{
...
@@ -513,18 +477,15 @@ struct DeviceConvFwdXdl_bias_activation_add<
...
@@ -513,18 +477,15 @@ struct DeviceConvFwdXdl_bias_activation_add<
GridwiseGemm
,
GridwiseGemm
,
ADataType
,
// TODO: distiguish A/B datatype
ADataType
,
// TODO: distiguish A/B datatype
CDataType
,
CDataType
,
remove_reference_t
<
DeviceConvFwdXdl_bias_activation_add
::
AGridDesc_K0_M_K1
>
,
remove_reference_t
<
DeviceOp
::
AGridDesc_K0_M_K1
>
,
remove_reference_t
<
DeviceConvFwdXdl_bias_activation_add
::
BGridDesc_K0_N_K1
>
,
remove_reference_t
<
DeviceOp
::
BGridDesc_K0_N_K1
>
,
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
DeviceConvFwdXdl_bias_activation_add
::
CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
remove_reference_t
<
DeviceOp
::
C0GridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
C1GridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
DeviceConvFwdXdl_bias_activation_add
::
C0GridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
remove_reference_t
<
DeviceConvFwdXdl_bias_activation_add
::
C1GridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
InElementwiseOperation
,
InElementwiseOperation
,
WeiElementwiseOperation
,
WeiElementwiseOperation
,
OutElementwiseOperation
,
OutElementwiseOperation
,
remove_reference_t
<
Device
ConvFwdXdl_bias_activation_add
::
Block2CTileMap
>
,
remove_reference_t
<
Device
Op
::
Block2CTileMap
>
,
true
>
;
true
>
;
ave_time
=
launch_and_time_kernel
(
kernel
,
ave_time
=
launch_and_time_kernel
(
kernel
,
...
@@ -553,18 +514,15 @@ struct DeviceConvFwdXdl_bias_activation_add<
...
@@ -553,18 +514,15 @@ struct DeviceConvFwdXdl_bias_activation_add<
GridwiseGemm
,
GridwiseGemm
,
ADataType
,
// TODO: distiguish A/B datatype
ADataType
,
// TODO: distiguish A/B datatype
CDataType
,
CDataType
,
remove_reference_t
<
DeviceConvFwdXdl_bias_activation_add
::
AGridDesc_K0_M_K1
>
,
remove_reference_t
<
DeviceOp
::
AGridDesc_K0_M_K1
>
,
remove_reference_t
<
DeviceConvFwdXdl_bias_activation_add
::
BGridDesc_K0_N_K1
>
,
remove_reference_t
<
DeviceOp
::
BGridDesc_K0_N_K1
>
,
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
DeviceConvFwdXdl_bias_activation_add
::
CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
remove_reference_t
<
DeviceOp
::
C0GridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
remove_reference_t
<
remove_reference_t
<
DeviceOp
::
C1GridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
DeviceConvFwdXdl_bias_activation_add
::
C0GridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
remove_reference_t
<
DeviceConvFwdXdl_bias_activation_add
::
C1GridDesc_M0_N0_M1_N1_M2_M3_M4_N2
>
,
InElementwiseOperation
,
InElementwiseOperation
,
WeiElementwiseOperation
,
WeiElementwiseOperation
,
OutElementwiseOperation
,
OutElementwiseOperation
,
remove_reference_t
<
Device
ConvFwdXdl_bias_activation_add
::
Block2CTileMap
>
,
remove_reference_t
<
Device
Op
::
Block2CTileMap
>
,
false
>
;
false
>
;
ave_time
=
launch_and_time_kernel
(
kernel
,
ave_time
=
launch_and_time_kernel
(
kernel
,
...
@@ -591,7 +549,6 @@ struct DeviceConvFwdXdl_bias_activation_add<
...
@@ -591,7 +549,6 @@ struct DeviceConvFwdXdl_bias_activation_add<
return
ave_time
;
return
ave_time
;
}
}
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
...
@@ -613,7 +570,6 @@ struct DeviceConvFwdXdl_bias_activation_add<
...
@@ -613,7 +570,6 @@ struct DeviceConvFwdXdl_bias_activation_add<
arg
.
N01_
);
arg
.
N01_
);
}
}
// polymorphic
bool
IsSupportedArgument
(
const
BaseArgument
*
p_arg
)
override
bool
IsSupportedArgument
(
const
BaseArgument
*
p_arg
)
override
{
{
return
IsSupportedArgument
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
IsSupportedArgument
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
...
@@ -661,6 +617,23 @@ struct DeviceConvFwdXdl_bias_activation_add<
...
@@ -661,6 +617,23 @@ struct DeviceConvFwdXdl_bias_activation_add<
}
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
std
::
string
GetTypeString
()
const
override
{
auto
str
=
std
::
stringstream
();
// clang-format off
str
<<
"DeviceConv2dFwdXdl_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K"
<<
"<"
<<
BlockSize
<<
", "
<<
MPerBlock
<<
", "
<<
NPerBlock
<<
", "
<<
K0PerBlock
<<
">"
;
// clang-format on
return
str
.
str
();
}
};
// namespace device
};
// namespace device
}
// namespace device
}
// namespace device
...
...
device_operation/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
1f8e8231
...
@@ -16,7 +16,7 @@ namespace ck {
...
@@ -16,7 +16,7 @@ namespace ck {
namespace
tensor_operation
{
namespace
tensor_operation
{
namespace
device
{
namespace
device
{
//
conv2d: in[n, hi, wi
,
c
]
* wei[k, y, x
,
c
]
= out[n, ho, wo
,
k
]
//
out[N, Ho, Wo
,
K
]
= in[N, Hi, Wi
,
C
]
* wei[K, Y, X
,
C
]
template
<
typename
InDataType
,
template
<
typename
InDataType
,
typename
WeiDataType
,
typename
WeiDataType
,
typename
OutDataType
,
typename
OutDataType
,
...
@@ -484,7 +484,6 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -484,7 +484,6 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
return
ave_time
;
return
ave_time
;
}
}
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
);
...
@@ -506,7 +505,6 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -506,7 +505,6 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
arg
.
N01_
);
arg
.
N01_
);
}
}
// polymorphic
bool
IsSupportedArgument
(
const
BaseArgument
*
p_arg
)
override
bool
IsSupportedArgument
(
const
BaseArgument
*
p_arg
)
override
{
{
return
IsSupportedArgument
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
IsSupportedArgument
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
...
@@ -551,7 +549,6 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -551,7 +549,6 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
// polymorphic
std
::
unique_ptr
<
BaseArgument
>
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
void
*
p_in_grid
,
MakeArgumentPointer
(
const
void
*
p_in_grid
,
const
void
*
p_wei_grid
,
const
void
*
p_wei_grid
,
...
@@ -590,13 +587,11 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
...
@@ -590,13 +587,11 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
out_element_op
);
out_element_op
);
}
}
// polymorphic
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
override
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
override
{
{
return
std
::
make_unique
<
Invoker
>
(
Invoker
{});
return
std
::
make_unique
<
Invoker
>
(
Invoker
{});
}
}
// polymorphic
std
::
string
GetTypeString
()
const
override
std
::
string
GetTypeString
()
const
override
{
{
auto
str
=
std
::
stringstream
();
auto
str
=
std
::
stringstream
();
...
...
example/4_conv_xdl_bias_relu_add/conv_xdl_bias_relu_add.cpp
View file @
1f8e8231
...
@@ -11,8 +11,7 @@
...
@@ -11,8 +11,7 @@
#include "host_tensor_generator.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "device_tensor.hpp"
#include "tensor_layout.hpp"
#include "tensor_layout.hpp"
#include "example/4_conv_xdl_bias_relu_add/include/device_conv_fwd_xdl_bias_activation_add.hpp"
#include "device_conv2d_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp"
#include "example/4_conv_xdl_bias_relu_add/include/device_conv_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp"
struct
PassThrough
struct
PassThrough
{
{
...
@@ -170,13 +169,14 @@ using InElementOp = PassThrough;
...
@@ -170,13 +169,14 @@ using InElementOp = PassThrough;
using
WeiElementOp
=
PassThrough
;
using
WeiElementOp
=
PassThrough
;
using
OutElementOp
=
BiasReluAdd
;
using
OutElementOp
=
BiasReluAdd
;
// clang-format off
using
DeviceConvFwdInstance
=
ck
::
tensor_operation
::
device
::
using
DeviceConvFwdInstance
=
DeviceConv2dFwdXdl_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
//################################################################| NDim| InData| WeiData| OutData| AccData| In| Wei| Out| In| Wei| Out| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| ABlockLds| BBlockLds|
// clang-format off
//################################################################| Spatial| Type| Type| Type| Type| Layout| Layout| Layout| Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| SrcDstVectorDim| DstScalar| AddExtraM| AddExtraN|
// | InData| WeiData| OutData| AccData| In| Wei| Out| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| ABlockLds| BBlockLds|
//################################################################| | | | | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_N_K1| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| Lengths_K0_N_K1| Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerVector| | |
// | Type| Type| Type| Type| Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| ThreadSlice| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| SrcDstVectorDim| DstScalar| AddExtraM| AddExtraN|
//################################################################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
// | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_N_K1| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| Lengths_K0_N_K1| Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerVector| | |
ck
::
tensor_operation
::
device
::
DeviceConvFwdXdl_bias_activation_add
<
2
,
InDataType
,
WeiDataType
,
OutDataType
,
AccDataType
,
InLayout
,
WeiLayout
,
OutLayout
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
256
,
128
,
256
,
4
,
8
,
32
,
32
,
2
,
4
,
S
<
1
,
2
,
8
>
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
S
<
1
,
4
,
8
>
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
7
,
1
,
true
,
true
>
;
// | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
<
InDataType
,
WeiDataType
,
OutDataType
,
AccDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
256
,
128
,
256
,
4
,
8
,
32
,
32
,
2
,
4
,
S
<
1
,
2
,
8
>
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
S
<
1
,
4
,
8
>
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
7
,
1
,
true
,
true
>
;
// clang-format on
// clang-format on
template
<
typename
TIn
,
template
<
typename
TIn
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment