Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f8b551da
Commit
f8b551da
authored
Jun 14, 2022
by
carlushuang
Browse files
add bias_relu, bias fusion
parent
bfa4c686
Changes
8
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1672 additions
and
232 deletions
+1672
-232
example/cpu_02_conv2d_fwd_bias_relu_add/cpu_conv2d_fwd_bias_relu_add.cpp
...conv2d_fwd_bias_relu_add/cpu_conv2d_fwd_bias_relu_add.cpp
+247
-24
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp
...ce_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp
+47
-13
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp
..._convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp
+48
-14
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp
...ce_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp
+47
-13
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp
.../threadwise_tensor_slice_transfer_avx2_specialization.hpp
+623
-0
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_kyxc_nhwk_instance.cpp
...nv2d_bias_activation_add_avx2_nhwc_kyxc_nhwk_instance.cpp
+220
-56
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
...2d_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
+220
-56
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_yxck_nhwk_instance.cpp
...nv2d_bias_activation_add_avx2_nhwc_yxck_nhwk_instance.cpp
+220
-56
No files found.
example/cpu_02_conv2d_fwd_bias_relu_add/cpu_conv2d_fwd_bias_relu_add.cpp
View file @
f8b551da
This diff is collapsed.
Click to expand it.
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp
View file @
f8b551da
...
@@ -37,6 +37,8 @@ template <typename InDataType,
...
@@ -37,6 +37,8 @@ template <typename InDataType,
bool
UseALocalBuffer
,
bool
UseALocalBuffer
,
bool
UseBLocalBuffer
,
bool
UseBLocalBuffer
,
bool
UseCLocalBuffer
,
bool
UseCLocalBuffer
,
bool
FuseBias
,
bool
FuseAdd
,
bool
BiasAlongGemmM
>
bool
BiasAlongGemmM
>
struct
DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
struct
DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
:
public
DeviceConvFwdBiasActivationAdd
<
InElementwiseOperation
,
:
public
DeviceConvFwdBiasActivationAdd
<
InElementwiseOperation
,
...
@@ -607,8 +609,13 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Outpu
...
@@ -607,8 +609,13 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Outpu
!
UseBLocalBuffer
,
!
UseBLocalBuffer
,
ConvForwardSpecialization
>
;
ConvForwardSpecialization
>
;
using
CThreadwiseCopy
=
static
constexpr
auto
GetCThreadwiseCopy
()
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_Residual_MxN
<
{
constexpr
ck
::
index_t
C_nDim
=
CGridDesc
::
GetNumOfDimension
();
if
constexpr
(
FuseBias
&&
FuseAdd
)
{
return
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_Residual_MxN
<
CDataType
,
CDataType
,
C0DataType
,
C0DataType
,
C1DataType
,
C1DataType
,
...
@@ -619,7 +626,34 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Outpu
...
@@ -619,7 +626,34 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Outpu
decltype
(
GetOutputBlockDescriptor
()),
decltype
(
GetOutputBlockDescriptor
()),
OutElementwiseOperation
,
OutElementwiseOperation
,
!
UseCLocalBuffer
,
!
UseCLocalBuffer
,
BiasAlongGemmM
>
;
BiasAlongGemmM
>
(
CGridDesc
{},
ck
::
make_zero_multi_index
<
C_nDim
>
(),
GetOutputBlockDescriptor
(),
ck
::
make_zero_multi_index
<
C_nDim
>
(),
OutElementwiseOperation
{});
}
else
if
constexpr
(
FuseBias
&&
!
FuseAdd
)
{
return
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_MxN
<
CDataType
,
C0DataType
,
C1DataType
,
CDataType
,
CGridDesc
,
C0GridDesc
,
C1GridDesc
,
decltype
(
GetOutputBlockDescriptor
()),
OutElementwiseOperation
,
!
UseCLocalBuffer
,
BiasAlongGemmM
>
(
CGridDesc
{},
ck
::
make_zero_multi_index
<
C_nDim
>
(),
GetOutputBlockDescriptor
(),
ck
::
make_zero_multi_index
<
C_nDim
>
(),
OutElementwiseOperation
{});
}
}
using
CThreadwiseCopy
=
decltype
(
GetCThreadwiseCopy
());
using
GridwiseGemm
=
ck
::
cpu
::
GridwiseGemmBiasActivationAddAvx2_MxN
<
using
GridwiseGemm
=
ck
::
cpu
::
GridwiseGemmBiasActivationAddAvx2_MxN
<
ADataType
,
// InDataType,
ADataType
,
// InDataType,
...
...
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp
View file @
f8b551da
...
@@ -37,6 +37,8 @@ template <typename InDataType,
...
@@ -37,6 +37,8 @@ template <typename InDataType,
bool
UseALocalBuffer
,
bool
UseALocalBuffer
,
bool
UseBLocalBuffer
,
bool
UseBLocalBuffer
,
bool
UseCLocalBuffer
,
bool
UseCLocalBuffer
,
bool
FuseBias
,
bool
FuseAdd
,
bool
BiasAlongGemmM
>
bool
BiasAlongGemmM
>
struct
DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
struct
DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
:
public
DeviceConvFwdBiasActivationAdd
<
InElementwiseOperation
,
:
public
DeviceConvFwdBiasActivationAdd
<
InElementwiseOperation
,
...
@@ -584,8 +586,13 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Ou
...
@@ -584,8 +586,13 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Ou
!
UseBLocalBuffer
,
!
UseBLocalBuffer
,
ConvForwardSpecialization
>
;
ConvForwardSpecialization
>
;
using
CThreadwiseCopy
=
static
constexpr
auto
GetCThreadwiseCopy
()
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_Residual_MxN
<
{
constexpr
ck
::
index_t
C_nDim
=
CGridDesc
::
GetNumOfDimension
();
if
constexpr
(
FuseBias
&&
FuseAdd
)
{
return
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_Residual_MxN
<
CDataType
,
CDataType
,
C0DataType
,
C0DataType
,
C1DataType
,
C1DataType
,
...
@@ -596,7 +603,34 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Ou
...
@@ -596,7 +603,34 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Ou
decltype
(
GetOutputBlockDescriptor
()),
decltype
(
GetOutputBlockDescriptor
()),
OutElementwiseOperation
,
OutElementwiseOperation
,
!
UseCLocalBuffer
,
!
UseCLocalBuffer
,
BiasAlongGemmM
>
;
BiasAlongGemmM
>
(
CGridDesc
{},
ck
::
make_zero_multi_index
<
C_nDim
>
(),
GetOutputBlockDescriptor
(),
ck
::
make_zero_multi_index
<
C_nDim
>
(),
OutElementwiseOperation
{});
}
else
if
constexpr
(
FuseBias
&&
!
FuseAdd
)
{
return
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_MxN
<
CDataType
,
C0DataType
,
C1DataType
,
CDataType
,
CGridDesc
,
C0GridDesc
,
C1GridDesc
,
decltype
(
GetOutputBlockDescriptor
()),
OutElementwiseOperation
,
!
UseCLocalBuffer
,
BiasAlongGemmM
>
(
CGridDesc
{},
ck
::
make_zero_multi_index
<
C_nDim
>
(),
GetOutputBlockDescriptor
(),
ck
::
make_zero_multi_index
<
C_nDim
>
(),
OutElementwiseOperation
{});
}
}
using
CThreadwiseCopy
=
decltype
(
GetCThreadwiseCopy
());
using
GridwiseGemm
=
ck
::
cpu
::
GridwiseGemmBiasActivationAddAvx2_MxN
<
using
GridwiseGemm
=
ck
::
cpu
::
GridwiseGemmBiasActivationAddAvx2_MxN
<
ADataType
,
// InDataType,
ADataType
,
// InDataType,
...
...
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp
View file @
f8b551da
...
@@ -36,6 +36,8 @@ template <typename InDataType,
...
@@ -36,6 +36,8 @@ template <typename InDataType,
bool
UseALocalBuffer
,
bool
UseALocalBuffer
,
bool
UseBLocalBuffer
,
bool
UseBLocalBuffer
,
bool
UseCLocalBuffer
,
bool
UseCLocalBuffer
,
bool
FuseBias
,
bool
FuseAdd
,
bool
BiasAlongGemmM
>
bool
BiasAlongGemmM
>
struct
DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_Y_X_C_K_Output_N_Ho_Wo_K
struct
DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_Y_X_C_K_Output_N_Ho_Wo_K
:
public
DeviceConvFwdBiasActivationAdd
<
InElementwiseOperation
,
:
public
DeviceConvFwdBiasActivationAdd
<
InElementwiseOperation
,
...
@@ -580,8 +582,13 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_Y_X_C_K_Outpu
...
@@ -580,8 +582,13 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_Y_X_C_K_Outpu
!
UseBLocalBuffer
,
!
UseBLocalBuffer
,
ConvForwardSpecialization
>
;
ConvForwardSpecialization
>
;
using
CThreadwiseCopy
=
static
constexpr
auto
GetCThreadwiseCopy
()
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_Residual_MxN
<
{
constexpr
ck
::
index_t
C_nDim
=
CGridDesc
::
GetNumOfDimension
();
if
constexpr
(
FuseBias
&&
FuseAdd
)
{
return
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_Residual_MxN
<
CDataType
,
CDataType
,
C0DataType
,
C0DataType
,
C1DataType
,
C1DataType
,
...
@@ -592,7 +599,34 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_Y_X_C_K_Outpu
...
@@ -592,7 +599,34 @@ struct DeviceConvNDFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_Y_X_C_K_Outpu
decltype
(
GetOutputBlockDescriptor
()),
decltype
(
GetOutputBlockDescriptor
()),
OutElementwiseOperation
,
OutElementwiseOperation
,
!
UseCLocalBuffer
,
!
UseCLocalBuffer
,
BiasAlongGemmM
>
;
BiasAlongGemmM
>
(
CGridDesc
{},
ck
::
make_zero_multi_index
<
C_nDim
>
(),
GetOutputBlockDescriptor
(),
ck
::
make_zero_multi_index
<
C_nDim
>
(),
OutElementwiseOperation
{});
}
else
if
constexpr
(
FuseBias
&&
!
FuseAdd
)
{
return
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2Specialization_MatC_Store_Bias_MxN
<
CDataType
,
C0DataType
,
C1DataType
,
CDataType
,
CGridDesc
,
C0GridDesc
,
C1GridDesc
,
decltype
(
GetOutputBlockDescriptor
()),
OutElementwiseOperation
,
!
UseCLocalBuffer
,
BiasAlongGemmM
>
(
CGridDesc
{},
ck
::
make_zero_multi_index
<
C_nDim
>
(),
GetOutputBlockDescriptor
(),
ck
::
make_zero_multi_index
<
C_nDim
>
(),
OutElementwiseOperation
{});
}
}
using
CThreadwiseCopy
=
decltype
(
GetCThreadwiseCopy
());
using
GridwiseGemm
=
ck
::
cpu
::
GridwiseGemmBiasActivationAddAvx2_MxN
<
using
GridwiseGemm
=
ck
::
cpu
::
GridwiseGemmBiasActivationAddAvx2_MxN
<
ADataType
,
// InDataType,
ADataType
,
// InDataType,
...
...
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp
View file @
f8b551da
This diff is collapsed.
Click to expand it.
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_kyxc_nhwk_instance.cpp
View file @
f8b551da
This diff is collapsed.
Click to expand it.
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
View file @
f8b551da
This diff is collapsed.
Click to expand it.
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_yxck_nhwk_instance.cpp
View file @
f8b551da
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment