Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
adbda385
"script/hack_isa.sh" did not exist on "0979fb4af9d78b821334a8021e9b8bf0812d8ae6"
Commit
adbda385
authored
Dec 20, 2021
by
Chao Liu
Browse files
clean up
parent
2c7ccf67
Changes
37
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
79 additions
and
72 deletions
+79
-72
device_operation/include/device_conv2d_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp
...ice_conv2d_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp
+7
-8
device_operation/include/device_conv2d_fwd_xdl_bias_activation_nhwc_kyxc_nhwk.hpp
.../device_conv2d_fwd_xdl_bias_activation_nhwc_kyxc_nhwk.hpp
+7
-8
device_operation/include/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
..._fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
+5
-5
device_operation/include/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
...nclude/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp
+5
-6
device_operation/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
...peration/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
+7
-8
device_operation/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_p0.hpp
...n/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_p0.hpp
+7
-8
device_operation/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_s1_p0.hpp
...nclude/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_s1_p0.hpp
+7
-8
device_operation/include/device_gemm_xdl.hpp
device_operation/include/device_gemm_xdl.hpp
+7
-8
example/4_conv2d_fwd_xdl_c_shuffle/README.md
example/4_conv2d_fwd_xdl_c_shuffle/README.md
+0
-0
example/4_conv2d_fwd_xdl_c_shuffle/conv2d_fwd_xdl_c_shuffle.cpp
...e/4_conv2d_fwd_xdl_c_shuffle/conv2d_fwd_xdl_c_shuffle.cpp
+3
-3
example/6_conv2d_fwd_xdl_c_shuffle_bias_relu_add/README.md
example/6_conv2d_fwd_xdl_c_shuffle_bias_relu_add/README.md
+0
-0
example/6_conv2d_fwd_xdl_c_shuffle_bias_relu_add/conv2d_fwd_xdl_c_shuffle_bias_relu_add.cpp
..._bias_relu_add/conv2d_fwd_xdl_c_shuffle_bias_relu_add.cpp
+2
-2
example/6_conv2d_fwd_xdl_c_shuffle_bias_relu_add/include/device_conv_fwd_xdl_bias_activation_add.hpp
...u_add/include/device_conv_fwd_xdl_bias_activation_add.hpp
+0
-0
example/CMakeLists.txt
example/CMakeLists.txt
+6
-6
profiler/CMakeLists.txt
profiler/CMakeLists.txt
+2
-0
profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp
profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp
+8
-2
profiler/include/profile_conv_fwd_impl.hpp
profiler/include/profile_conv_fwd_impl.hpp
+6
-0
No files found.
device_operation/include/device_conv2d_fwd_xdl_bias_activation_add_nhwc_kyxc_nhwk.hpp
View file @
adbda385
...
...
@@ -40,16 +40,16 @@ template <typename InDataType,
ck
::
index_t
ABlockTransferSrcVectorDim
,
ck
::
index_t
ABlockTransferSrcScalarPerVector
,
ck
::
index_t
ABlockTransferDstScalarPerVector_K1
,
bool
ABlockLdsAddExtraM
,
typename
BBlockTransferThreadClusterLengths_K0_N_K1
,
typename
BBlockTransferThreadClusterArrangeOrder
,
typename
BBlockTransferSrcAccessOrder
,
ck
::
index_t
BBlockTransferSrcVectorDim
,
ck
::
index_t
BBlockTransferSrcScalarPerVector
,
ck
::
index_t
BBlockTransferDstScalarPerVector_K1
,
bool
BBlockLdsAddExtraN
,
ck
::
index_t
CThreadTransferSrcDstVectorDim
,
ck
::
index_t
CThreadTransferDstScalarPerVector
,
bool
ABlockLdsAddExtraM
,
bool
BBlockLdsAddExtraN
>
ck
::
index_t
CThreadTransferDstScalarPerVector
>
struct
DeviceConv2dFwdXdl_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
:
public
DeviceConvFwdBiasActivationAdd
<
InElementwiseOperation
,
WeiElementwiseOperation
,
...
...
@@ -256,19 +256,18 @@ struct DeviceConv2dFwdXdl_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Out
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K1
,
false
,
// AThreadTransferSrcResetCoordinateAfterRun,
ABlockLdsAddExtraM
,
BBlockTransferThreadClusterLengths_K0_N_K1
,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder,
2
,
// BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector
,
BBlockTransferDstScalarPerVector_K1
,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
BBlockLdsAddExtraN
,
Sequence
<
2
,
3
,
0
,
1
,
7
,
5
,
4
,
6
>
,
// CThreadTransferSrcDstAccessOrder,
7
,
// CThreadTransferSrcDstVectorDim,
CThreadTransferDstScalarPerVector
,
false
,
// CAccessOrderMRepeatNRepeat,
ABlockLdsAddExtraM
,
BBlockLdsAddExtraN
>
;
CThreadTransferDstScalarPerVector
>
;
using
CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2
=
decltype
(
GridwiseGemm
::
MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2
(
CGridDesc_M_N
{}));
...
...
device_operation/include/device_conv2d_fwd_xdl_bias_activation_nhwc_kyxc_nhwk.hpp
View file @
adbda385
...
...
@@ -40,16 +40,16 @@ template <typename InDataType,
ck
::
index_t
ABlockTransferSrcVectorDim
,
ck
::
index_t
ABlockTransferSrcScalarPerVector
,
ck
::
index_t
ABlockTransferDstScalarPerVector_K1
,
bool
ABlockLdsAddExtraM
,
typename
BBlockTransferThreadClusterLengths_K0_N_K1
,
typename
BBlockTransferThreadClusterArrangeOrder
,
typename
BBlockTransferSrcAccessOrder
,
ck
::
index_t
BBlockTransferSrcVectorDim
,
ck
::
index_t
BBlockTransferSrcScalarPerVector
,
ck
::
index_t
BBlockTransferDstScalarPerVector_K1
,
bool
BBlockLdsAddExtraN
,
ck
::
index_t
CThreadTransferSrcDstVectorDim
,
ck
::
index_t
CThreadTransferDstScalarPerVector
,
bool
ABlockLdsAddExtraM
,
bool
BBlockLdsAddExtraN
>
ck
::
index_t
CThreadTransferDstScalarPerVector
>
struct
DeviceConv2dFwdXdl_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
:
public
DeviceConvFwdBiasActivation
<
InElementwiseOperation
,
WeiElementwiseOperation
,
...
...
@@ -250,19 +250,18 @@ struct DeviceConv2dFwdXdl_Bias_Activation_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K1
,
false
,
// AThreadTransferSrcResetCoordinateAfterRun,
ABlockLdsAddExtraM
,
BBlockTransferThreadClusterLengths_K0_N_K1
,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder,
2
,
// BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector
,
BBlockTransferDstScalarPerVector_K1
,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
BBlockLdsAddExtraN
,
Sequence
<
2
,
3
,
0
,
1
,
7
,
5
,
4
,
6
>
,
// CThreadTransferSrcDstAccessOrder,
7
,
// CThreadTransferSrcDstVectorDim,
CThreadTransferDstScalarPerVector
,
false
,
// CAccessOrderMRepeatNRepeat,
ABlockLdsAddExtraM
,
BBlockLdsAddExtraN
>
;
CThreadTransferDstScalarPerVector
>
;
using
CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2
=
decltype
(
GridwiseGemm
::
MakeCGridDescriptor_M0_N0_M1_N1_M2_M3_M4_N2
(
CGridDesc_M_N
{}));
...
...
device_operation/include/device_conv2d_fwd_xdl_
output
_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
→
device_operation/include/device_conv2d_fwd_xdl_
c
_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp
View file @
adbda385
#ifndef DEVICE_CONV2D_FWD_XDL_
OUTPUT
_SHUFFLE_BIAS_ACTIVATION_ADD_NHWC_KYXC_NHWK_HPP
#define DEVICE_CONV2D_FWD_XDL_
OUTPUT
_SHUFFLE_BIAS_ACTIVATION_ADD_NHWC_KYXC_NHWK_HPP
#ifndef DEVICE_CONV2D_FWD_XDL_
C
_SHUFFLE_BIAS_ACTIVATION_ADD_NHWC_KYXC_NHWK_HPP
#define DEVICE_CONV2D_FWD_XDL_
C
_SHUFFLE_BIAS_ACTIVATION_ADD_NHWC_KYXC_NHWK_HPP
#include <iostream>
#include <sstream>
...
...
@@ -54,13 +54,13 @@ template <
typename
CBlockTransferClusterLengths_MBlock_MRepeat_MWaveMPerXdl_NBlock_NRepeat_NWaveNPerXdl
,
index_t
CBlockTransferScalarPerVector_NWaveNPerXdl
>
struct
DeviceConv2dFwdXdl_
Output
_Shuffle_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
DeviceConv2dFwdXdl_
C
_Shuffle_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
:
public
DeviceConvFwdBiasActivationAdd
<
InElementwiseOperation
,
WeiElementwiseOperation
,
OutElementwiseOperation
>
{
using
DeviceOp
=
DeviceConv2dFwdXdl_
Output
_Shuffle_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
;
DeviceConv2dFwdXdl_
C
_Shuffle_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
;
using
ADataType
=
InDataType
;
using
BDataType
=
WeiDataType
;
...
...
@@ -642,7 +642,7 @@ struct
auto
str
=
std
::
stringstream
();
// clang-format off
str
<<
"DeviceConv2dFwdXdl_
Output
_Shuffle_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K"
str
<<
"DeviceConv2dFwdXdl_
C
_Shuffle_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K"
<<
"<"
<<
BlockSize
<<
", "
<<
MPerBlock
<<
", "
...
...
device_operation/include/device_conv2d_fwd_xdl_
output
_shuffle_nhwc_kyxc_nhwk.hpp
→
device_operation/include/device_conv2d_fwd_xdl_
c
_shuffle_nhwc_kyxc_nhwk.hpp
View file @
adbda385
#ifndef DEVICE_CONV2D_FWD_XDL_
OUTPUT
_SHUFFLE_NHWC_KYXC_NHWK_HPP
#define DEVICE_CONV2D_FWD_XDL_
OUTPUT
_SHUFFLE_NHWC_KYXC_NHWK_HPP
#ifndef DEVICE_CONV2D_FWD_XDL_
C
_SHUFFLE_NHWC_KYXC_NHWK_HPP
#define DEVICE_CONV2D_FWD_XDL_
C
_SHUFFLE_NHWC_KYXC_NHWK_HPP
#include <iostream>
#include <sstream>
...
...
@@ -52,11 +52,10 @@ template <
index_t
CShuffleNRepeatPerShuffle
,
typename
CBlockTransferClusterLengths_MBlock_MRepeat_MWaveMPerXdl_NBlock_NRepeat_NWaveNPerXdl
,
index_t
CBlockTransferScalarPerVector_NWaveNPerXdl
>
struct
DeviceConv2dFwdXdl_
Output
_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
struct
DeviceConv2dFwdXdl_
C
_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
:
public
DeviceConvFwd
<
InElementwiseOperation
,
WeiElementwiseOperation
,
OutElementwiseOperation
>
{
using
DeviceOp
=
DeviceConv2dFwdXdl_Output_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
;
using
DeviceOp
=
DeviceConv2dFwdXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
;
using
ADataType
=
InDataType
;
using
BDataType
=
WeiDataType
;
...
...
@@ -580,7 +579,7 @@ struct DeviceConv2dFwdXdl_Output_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N
auto
str
=
std
::
stringstream
();
// clang-format off
str
<<
"DeviceConv2dFwdXdl_
Output
_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K"
str
<<
"DeviceConv2dFwdXdl_
C
_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K"
<<
"<"
<<
BlockSize
<<
", "
<<
MPerBlock
<<
", "
...
...
device_operation/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp
View file @
adbda385
...
...
@@ -39,16 +39,16 @@ template <typename InDataType,
ck
::
index_t
ABlockTransferSrcVectorDim
,
ck
::
index_t
ABlockTransferSrcScalarPerVector
,
ck
::
index_t
ABlockTransferDstScalarPerVector_K1
,
bool
ABlockLdsAddExtraM
,
typename
BBlockTransferThreadClusterLengths_K0_N_K1
,
typename
BBlockTransferThreadClusterArrangeOrder
,
typename
BBlockTransferSrcAccessOrder
,
ck
::
index_t
BBlockTransferSrcVectorDim
,
ck
::
index_t
BBlockTransferSrcScalarPerVector
,
ck
::
index_t
BBlockTransferDstScalarPerVector_K1
,
bool
BBlockLdsAddExtraN
,
ck
::
index_t
CThreadTransferSrcDstVectorDim
,
ck
::
index_t
CThreadTransferDstScalarPerVector
,
bool
ABlockLdsAddExtraM
,
bool
BBlockLdsAddExtraN
>
ck
::
index_t
CThreadTransferDstScalarPerVector
>
struct
DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
:
public
DeviceConvFwd
<
InElementwiseOperation
,
WeiElementwiseOperation
,
OutElementwiseOperation
>
{
...
...
@@ -235,19 +235,18 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K1
,
false
,
// AThreadTransferSrcResetCoordinateAfterRun,
ABlockLdsAddExtraM
,
BBlockTransferThreadClusterLengths_K0_N_K1
,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder,
2
,
// BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector
,
BBlockTransferDstScalarPerVector_K1
,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
BBlockLdsAddExtraN
,
Sequence
<
2
,
3
,
0
,
1
,
7
,
5
,
4
,
6
>
,
// CThreadTransferSrcDstAccessOrder,
7
,
// CThreadTransferSrcDstVectorDim,
CThreadTransferDstScalarPerVector
,
false
,
// CAccessOrderMRepeatNRepeat,
ABlockLdsAddExtraM
,
BBlockLdsAddExtraN
>
;
CThreadTransferDstScalarPerVector
>
;
// Argument
struct
Argument
:
public
BaseArgument
...
...
device_operation/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_p0.hpp
View file @
adbda385
...
...
@@ -39,16 +39,16 @@ template <typename InDataType,
ck
::
index_t
ABlockTransferSrcVectorDim
,
ck
::
index_t
ABlockTransferSrcScalarPerVector
,
ck
::
index_t
ABlockTransferDstScalarPerVector_K1
,
bool
ABlockLdsAddExtraM
,
typename
BBlockTransferThreadClusterLengths_K0_N_K1
,
typename
BBlockTransferThreadClusterArrangeOrder
,
typename
BBlockTransferSrcAccessOrder
,
ck
::
index_t
BBlockTransferSrcVectorDim
,
ck
::
index_t
BBlockTransferSrcScalarPerVector
,
ck
::
index_t
BBlockTransferDstScalarPerVector_K1
,
bool
BBlockLdsAddExtraN
,
ck
::
index_t
CThreadTransferSrcDstVectorDim
,
ck
::
index_t
CThreadTransferDstScalarPerVector
,
bool
ABlockLdsAddExtraM
,
bool
BBlockLdsAddExtraN
>
ck
::
index_t
CThreadTransferDstScalarPerVector
>
struct
DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K_1x1_P0
:
public
DeviceConvFwd
<
InElementwiseOperation
,
WeiElementwiseOperation
,
OutElementwiseOperation
>
{
...
...
@@ -194,19 +194,18 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K_1x1_P0
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K1
,
false
,
// AThreadTransferSrcResetCoordinateAfterRun,
ABlockLdsAddExtraM
,
BBlockTransferThreadClusterLengths_K0_N_K1
,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder,
2
,
// BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector
,
BBlockTransferDstScalarPerVector_K1
,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
BBlockLdsAddExtraN
,
Sequence
<
2
,
3
,
0
,
1
,
7
,
5
,
4
,
6
>
,
// CThreadTransferSrcDstAccessOrder,
7
,
// CThreadTransferSrcDstVectorDim,
CThreadTransferDstScalarPerVector
,
false
,
// CAccessOrderMRepeatNRepeat,
ABlockLdsAddExtraM
,
BBlockLdsAddExtraN
>
;
CThreadTransferDstScalarPerVector
>
;
// Argument
struct
Argument
:
public
BaseArgument
...
...
device_operation/include/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_s1_p0.hpp
View file @
adbda385
...
...
@@ -39,16 +39,16 @@ template <typename InDataType,
ck
::
index_t
ABlockTransferSrcVectorDim
,
ck
::
index_t
ABlockTransferSrcScalarPerVector
,
ck
::
index_t
ABlockTransferDstScalarPerVector_K1
,
bool
ABlockLdsAddExtraM
,
typename
BBlockTransferThreadClusterLengths_K0_N_K1
,
typename
BBlockTransferThreadClusterArrangeOrder
,
typename
BBlockTransferSrcAccessOrder
,
ck
::
index_t
BBlockTransferSrcVectorDim
,
ck
::
index_t
BBlockTransferSrcScalarPerVector
,
ck
::
index_t
BBlockTransferDstScalarPerVector_K1
,
bool
BBlockLdsAddExtraN
,
ck
::
index_t
CThreadTransferSrcDstVectorDim
,
ck
::
index_t
CThreadTransferDstScalarPerVector
,
bool
ABlockLdsAddExtraM
,
bool
BBlockLdsAddExtraN
>
ck
::
index_t
CThreadTransferDstScalarPerVector
>
struct
DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K_1x1_S1_P0
:
public
DeviceConvFwd
<
InElementwiseOperation
,
WeiElementwiseOperation
,
OutElementwiseOperation
>
{
...
...
@@ -171,19 +171,18 @@ struct DeviceConv2dFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K_1x1_S1
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K1
,
false
,
// AThreadTransferSrcResetCoordinateAfterRun,
ABlockLdsAddExtraM
,
BBlockTransferThreadClusterLengths_K0_N_K1
,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder,
Sequence
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder,
2
,
// BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector
,
BBlockTransferDstScalarPerVector_K1
,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
BBlockLdsAddExtraN
,
Sequence
<
2
,
3
,
0
,
1
,
7
,
5
,
4
,
6
>
,
// CThreadTransferSrcDstAccessOrder,
7
,
// CThreadTransferSrcDstVectorDim,
CThreadTransferDstScalarPerVector
,
false
,
// CAccessOrderMRepeatNRepeat,
ABlockLdsAddExtraM
,
BBlockLdsAddExtraN
>
;
CThreadTransferDstScalarPerVector
>
;
// Argument
struct
Argument
:
public
BaseArgument
...
...
device_operation/include/device_gemm_xdl.hpp
View file @
adbda385
...
...
@@ -41,16 +41,16 @@ template <typename ADataType,
ck
::
index_t
ABlockTransferSrcVectorDim
,
ck
::
index_t
ABlockTransferSrcScalarPerVector
,
ck
::
index_t
ABlockTransferDstScalarPerVector_K1
,
bool
ABlockLdsAddExtraM
,
typename
BBlockTransferThreadClusterLengths_K0_N_K1
,
typename
BBlockTransferThreadClusterArrangeOrder
,
typename
BBlockTransferSrcAccessOrder
,
ck
::
index_t
BBlockTransferSrcVectorDim
,
ck
::
index_t
BBlockTransferSrcScalarPerVector
,
ck
::
index_t
BBlockTransferDstScalarPerVector_K1
,
bool
BBlockLdsAddExtraN
,
ck
::
index_t
CThreadTransferSrcDstVectorDim
,
ck
::
index_t
CThreadTransferDstScalarPerVector
,
bool
ABlockLdsAddExtraM
,
bool
BBlockLdsAddExtraN
>
ck
::
index_t
CThreadTransferDstScalarPerVector
>
struct
DeviceGemmXdl
:
public
DeviceGemm
<
AElementwiseOperation
,
BElementwiseOperation
,
CElementwiseOperation
>
{
...
...
@@ -158,19 +158,18 @@ struct DeviceGemmXdl
ABlockTransferSrcScalarPerVector
,
ABlockTransferDstScalarPerVector_K1
,
false
,
// AThreadTransferSrcResetCoordinateAfterRun,
ABlockLdsAddExtraM
,
BBlockTransferThreadClusterLengths_K0_N_K1
,
BBlockTransferThreadClusterArrangeOrder
,
BBlockTransferSrcAccessOrder
,
BBlockTransferSrcVectorDim
,
BBlockTransferSrcScalarPerVector
,
BBlockTransferDstScalarPerVector_K1
,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
false
,
// BThreadTransferSrcResetCoordinateAfterRun,
BBlockLdsAddExtraN
,
Sequence
<
0
,
2
,
4
,
5
,
6
,
1
,
3
,
7
>
,
// CThreadTransferSrcDstAccessOrder,
CThreadTransferSrcDstVectorDim
,
CThreadTransferDstScalarPerVector
,
false
,
// CAccessOrderMRepeatNRepeat,
ABlockLdsAddExtraM
,
BBlockLdsAddExtraN
>
;
CThreadTransferDstScalarPerVector
>
;
// Argument
struct
Argument
:
public
BaseArgument
...
...
example/4_conv2d_fwd_xdl_
output
_shuffle/README.md
→
example/4_conv2d_fwd_xdl_
c
_shuffle/README.md
View file @
adbda385
File moved
example/4_conv2d_fwd_xdl_
output
_shuffle/conv2d_fwd_xdl_
output
_shuffle.cpp
→
example/4_conv2d_fwd_xdl_
c
_shuffle/conv2d_fwd_xdl_
c
_shuffle.cpp
View file @
adbda385
...
...
@@ -11,7 +11,7 @@
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "tensor_layout.hpp"
#include "device_operation/include/device_conv2d_fwd_xdl_
output
_shuffle_nhwc_kyxc_nhwk.hpp"
#include "device_operation/include/device_conv2d_fwd_xdl_
c
_shuffle_nhwc_kyxc_nhwk.hpp"
#include "element_wise_operation.hpp"
using
InDataType
=
ck
::
half_t
;
...
...
@@ -28,10 +28,10 @@ using OutLayout = ck::tensor_layout::convolution::NHWK;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
_v2
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
DeviceConvFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceConv2dFwdXdl_
Output
_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
DeviceConv2dFwdXdl_
C
_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
// clang-format off
// | InData| WeiData| OutData| AccData| In| Wei| Out| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
// | Type| Type| Type| Type| Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MRepeate| NRepeate| _MBlock_MRepeat_MWaveMPerXdl| ScalarPerVector|
...
...
example/6_conv2d_fwd_xdl_
output
_shuffle_bias_relu_add/README.md
→
example/6_conv2d_fwd_xdl_
c
_shuffle_bias_relu_add/README.md
View file @
adbda385
File moved
example/6_conv2d_fwd_xdl_
output
_shuffle_bias_relu_add/conv2d_fwd_xdl_
output
_shuffle_bias_relu_add.cpp
→
example/6_conv2d_fwd_xdl_
c
_shuffle_bias_relu_add/conv2d_fwd_xdl_
c
_shuffle_bias_relu_add.cpp
View file @
adbda385
...
...
@@ -11,7 +11,7 @@
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
#include "tensor_layout.hpp"
#include "device_conv2d_fwd_xdl_
output
_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp"
#include "device_conv2d_fwd_xdl_
c
_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp"
#include "element_wise_operation.hpp"
using
InDataType
=
ck
::
half_t
;
...
...
@@ -32,7 +32,7 @@ using OutElementOp = ck::tensor_operation::element_wise::AddReluAdd_v2;
// clang-format off
using
DeviceConvFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceConv2dFwdXdl_
Output
_Shuffle_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
DeviceConv2dFwdXdl_
C
_Shuffle_Bias_Activation_Add_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
// | InData| WeiData| OutData| AccData| In| Wei| Out| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
// | Type| Type| Type| Type| Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MRepeate| NRepeate| _MBlock_MRepeat_MWaveMPerXdl| ScalarPerVector|
// | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | PerShuffle| PerShuffle| _NBlock_NRepeat_NWaveNPerXdl| _NWaveNPerXdl|
...
...
example/6_conv2d_fwd_xdl_
output
_shuffle_bias_relu_add/include/device_conv_fwd_xdl_bias_activation_add.hpp
→
example/6_conv2d_fwd_xdl_
c
_shuffle_bias_relu_add/include/device_conv_fwd_xdl_bias_activation_add.hpp
View file @
adbda385
File moved
example/CMakeLists.txt
View file @
adbda385
...
...
@@ -14,23 +14,23 @@ include_directories(BEFORE
set
(
GEMM_XDL_SOURCE 1_gemm_xdl/gemm_xdl.cpp
)
set
(
GEMM_XDL_BIAS_RELU_ADD_SOURCE 3_gemm_xdl_bias_relu_add/gemm_xdl_bias_relu_add.cpp
)
set
(
CONV2D_FWD_XDL_SOURCE 4_conv2d_fwd_xdl/conv2d_fwd_xdl.cpp
)
set
(
CONV2D_FWD_XDL_
OUTPUT
_SHUFFLE_SOURCE 4_conv2d_fwd_xdl_
output
_shuffle/conv2d_fwd_xdl_
output
_shuffle.cpp
)
set
(
CONV2D_FWD_XDL_
C
_SHUFFLE_SOURCE 4_conv2d_fwd_xdl_
c
_shuffle/conv2d_fwd_xdl_
c
_shuffle.cpp
)
set
(
CONV2D_FWD_XDL_BIAS_RELU_SOURCE 5_conv2d_fwd_xdl_bias_relu/conv2d_fwd_xdl_bias_relu.cpp
)
set
(
CONV2D_FWD_XDL_BIAS_RELU_ADD_SOURCE 6_conv2d_fwd_xdl_bias_relu_add/conv2d_fwd_xdl_bias_relu_add.cpp
)
set
(
CONV2D_FWD_XDL_
OUTPUT
_SHUFFLE_BIAS_RELU_ADD_SOURCE 6_conv2d_fwd_xdl_
output
_shuffle_bias_relu_add/conv2d_fwd_xdl_
output
_shuffle_bias_relu_add.cpp
)
set
(
CONV2D_FWD_XDL_
C
_SHUFFLE_BIAS_RELU_ADD_SOURCE 6_conv2d_fwd_xdl_
c
_shuffle_bias_relu_add/conv2d_fwd_xdl_
c
_shuffle_bias_relu_add.cpp
)
add_executable
(
gemm_xdl
${
GEMM_XDL_SOURCE
}
)
add_executable
(
gemm_xdl_bias_relu_add
${
GEMM_XDL_BIAS_RELU_ADD_SOURCE
}
)
add_executable
(
conv2d_fwd_xdl
${
CONV2D_FWD_XDL_SOURCE
}
)
add_executable
(
conv2d_fwd_xdl_
output
_shuffle
${
CONV2D_FWD_XDL_
OUTPUT
_SHUFFLE_SOURCE
}
)
add_executable
(
conv2d_fwd_xdl_
c
_shuffle
${
CONV2D_FWD_XDL_
C
_SHUFFLE_SOURCE
}
)
add_executable
(
conv2d_fwd_xdl_bias_relu
${
CONV2D_FWD_XDL_BIAS_RELU_SOURCE
}
)
add_executable
(
conv2d_fwd_xdl_bias_relu_add
${
CONV2D_FWD_XDL_BIAS_RELU_ADD_SOURCE
}
)
add_executable
(
conv2d_fwd_xdl_
output
_shuffle_bias_relu_add
${
CONV2D_FWD_XDL_
OUTPUT
_SHUFFLE_BIAS_RELU_ADD_SOURCE
}
)
add_executable
(
conv2d_fwd_xdl_
c
_shuffle_bias_relu_add
${
CONV2D_FWD_XDL_
C
_SHUFFLE_BIAS_RELU_ADD_SOURCE
}
)
target_link_libraries
(
gemm_xdl PRIVATE host_tensor
)
target_link_libraries
(
gemm_xdl_bias_relu_add PRIVATE host_tensor
)
target_link_libraries
(
conv2d_fwd_xdl PRIVATE host_tensor
)
target_link_libraries
(
conv2d_fwd_xdl_
output
_shuffle PRIVATE host_tensor
)
target_link_libraries
(
conv2d_fwd_xdl_
c
_shuffle PRIVATE host_tensor
)
target_link_libraries
(
conv2d_fwd_xdl_bias_relu PRIVATE host_tensor
)
target_link_libraries
(
conv2d_fwd_xdl_bias_relu_add PRIVATE host_tensor
)
target_link_libraries
(
conv2d_fwd_xdl_
output
_shuffle_bias_relu_add PRIVATE host_tensor
)
target_link_libraries
(
conv2d_fwd_xdl_
c
_shuffle_bias_relu_add PRIVATE host_tensor
)
profiler/CMakeLists.txt
View file @
adbda385
...
...
@@ -36,6 +36,7 @@ set(DEVICE_CONV2D_FWD_INSTANCE_SOURCE
${
PROJECT_SOURCE_DIR
}
/device_operation/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp;
${
PROJECT_SOURCE_DIR
}
/device_operation/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_p0_f16_instance.cpp;
${
PROJECT_SOURCE_DIR
}
/device_operation/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_s1_p0_f16_instance.cpp;
${
PROJECT_SOURCE_DIR
}
/device_operation/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_instance SHARED
${
DEVICE_CONV2D_FWD_INSTANCE_SOURCE
}
)
...
...
@@ -58,6 +59,7 @@ install(TARGETS device_conv2d_fwd_bias_relu_instance LIBRARY DESTINATION lib)
# device_conv2d_fwd_bias_relu_add_instance
set
(
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
${
PROJECT_SOURCE_DIR
}
/device_operation/device_conv2d_fwd_xdl_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp;
${
PROJECT_SOURCE_DIR
}
/device_operation/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp;
)
add_library
(
device_conv2d_fwd_bias_relu_add_instance SHARED
${
DEVICE_CONV2D_FWD_BIAS_RELU_ADD_INSTANCE_SOURCE
}
)
...
...
profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp
View file @
adbda385
...
...
@@ -19,7 +19,10 @@ using DeviceConvFwdBiasReluAddPtr =
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
AddReluAdd
>
;
void
add_device_conv2d_fwd_bias_relu_add_xdl_nhwc_kyxc_nhwk_fp16_instances
(
void
add_device_conv2d_fwd_bias_relu_xdl_nhwc_kyxc_nhwk_fp16_instances
(
std
::
vector
<
DeviceConvFwdBiasReluAddPtr
>&
);
void
add_device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instances
(
std
::
vector
<
DeviceConvFwdBiasReluAddPtr
>&
);
}
// namespace device_conv2d_fwd_bias_activation_add_instance
...
...
@@ -208,7 +211,10 @@ void profile_conv_fwd_bias_relu_add_impl(int do_verification,
ck
::
is_same_v
<
ck
::
remove_cv_t
<
OutDataType
>
,
ck
::
half_t
>
)
{
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_bias_activation_add_instance
::
add_device_conv2d_fwd_bias_relu_add_xdl_nhwc_kyxc_nhwk_fp16_instances
(
op_ptrs
);
add_device_conv2d_fwd_bias_relu_xdl_nhwc_kyxc_nhwk_fp16_instances
(
op_ptrs
);
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_bias_activation_add_instance
::
add_device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instances
(
op_ptrs
);
}
if
(
op_ptrs
.
size
()
<=
0
)
...
...
profiler/include/profile_conv_fwd_impl.hpp
View file @
adbda385
...
...
@@ -28,6 +28,9 @@ void add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_p0_fp16_instances(
void
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_s1_p0_fp16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
void
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_fp16_instances
(
std
::
vector
<
DeviceConvFwdNoOpPtr
>&
);
}
// namespace device_conv2d_fwd_instance
}
// namespace device
}
// namespace tensor_operation
...
...
@@ -154,6 +157,9 @@ void profile_conv_fwd_impl(int do_verification,
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_instance
::
add_device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_1x1_s1_p0_fp16_instances
(
conv_ptrs
);
ck
::
tensor_operation
::
device
::
device_conv2d_fwd_instance
::
add_device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_fp16_instances
(
conv_ptrs
);
}
if
(
conv_ptrs
.
size
()
<=
0
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment