Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
d4e98e5b
Commit
d4e98e5b
authored
Jul 15, 2022
by
carlushuang
Browse files
add 2 new instances
parent
71e5eab1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
69 additions
and
65 deletions
+69
-65
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
...vice_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
+3
-1
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_direct_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
...wd_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
+66
-64
No files found.
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
View file @
d4e98e5b
...
@@ -64,7 +64,9 @@ void add_device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk(
...
@@ -64,7 +64,9 @@ void add_device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk(
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
DeviceConvNDDirectFwdAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
PT
,
PT
,
PT
,
ConvFwdDefault
,
2
,
6
,
16
,
false
,
false
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MKN
}),
DeviceConvNDDirectFwdAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
PT
,
PT
,
PT
,
ConvFwdDefault
,
2
,
6
,
16
,
false
,
false
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MKN
}),
DeviceConvNDDirectFwdAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
PT
,
PT
,
PT
,
ConvFwdDefault
,
2
,
6
,
16
,
false
,
false
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MNK
})
DeviceConvNDDirectFwdAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
PT
,
PT
,
PT
,
ConvFwdDefault
,
2
,
6
,
16
,
false
,
false
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MNK
}),
DeviceConvNDDirectFwdAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
PT
,
PT
,
PT
,
ConvFwdDefault
,
2
,
4
,
24
,
false
,
false
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MKN
}),
DeviceConvNDDirectFwdAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
PT
,
PT
,
PT
,
ConvFwdDefault
,
2
,
4
,
24
,
false
,
false
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MNK
})
// clang-format on
// clang-format on
));
));
}
}
...
...
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_direct_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
View file @
d4e98e5b
#include <stdlib.h>
#include <stdlib.h>
#include <utility>
#include <utility>
#include <memory>
#include <memory>
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_direct_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_direct_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
namespace
cpu
{
namespace
cpu
{
namespace
device
{
namespace
device
{
namespace
device_conv2d_fwd_bias_activation_add_avx2_instance
{
namespace
device_conv2d_fwd_bias_activation_add_avx2_instance
{
using
InType
=
float
;
using
InType
=
float
;
using
WeiType
=
float
;
using
WeiType
=
float
;
using
OutType
=
float
;
using
OutType
=
float
;
using
AccType
=
float
;
using
AccType
=
float
;
using
InLayout
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
// NHWC
using
InLayout
=
ck
::
tensor_layout
::
gemm
::
RowMajor
;
// NHWC
using
WeiLayout
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
// KYXCK8
using
WeiLayout
=
ck
::
tensor_layout
::
gemm
::
ColumnMajor
;
// KYXCK8
static
constexpr
bool
NonTemporalStore
=
false
;
static
constexpr
bool
NonTemporalStore
=
false
;
using
PT
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
PassThrough
;
using
PT
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
PassThrough
;
using
AddReluAdd
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
AddReluAdd
;
using
AddReluAdd
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
AddReluAdd
;
using
AddRelu
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
AddRelu
;
using
AddRelu
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
AddRelu
;
using
Add
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
Add
;
using
Add
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
Add
;
using
AddAddRelu
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
AddAddRelu
;
using
AddAddRelu
=
ck
::
tensor_operation
::
cpu
::
element_wise
::
AddAddRelu
;
static
constexpr
auto
ConvFwdDefault
=
static
constexpr
auto
ConvFwdDefault
=
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardSpecialization_t
::
Default
;
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardSpecialization_t
::
Default
;
static
constexpr
auto
ConvFwd1x1P0
=
static
constexpr
auto
ConvFwd1x1P0
=
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardSpecialization_t
::
Filter1x1Pad0
;
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardSpecialization_t
::
Filter1x1Pad0
;
static
constexpr
auto
ConvFwd1x1S1P0
=
static
constexpr
auto
ConvFwd1x1S1P0
=
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardSpecialization_t
::
Filter1x1Stride1Pad0
;
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardSpecialization_t
::
Filter1x1Stride1Pad0
;
static
constexpr
auto
DefaultGemmKLoop
=
static
constexpr
auto
DefaultGemmKLoop
=
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardGemmKSpecialization_t
::
DefaultGemmKLoop
;
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardGemmKSpecialization_t
::
DefaultGemmKLoop
;
static
constexpr
auto
GemmKLoopOverC
=
static
constexpr
auto
GemmKLoopOverC
=
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardGemmKSpecialization_t
::
NHWC_GemmKLoopOverC
;
ck
::
tensor_operation
::
cpu
::
device
::
ConvolutionForwardGemmKSpecialization_t
::
NHWC_GemmKLoopOverC
;
static
constexpr
auto
LoopOver_MNK
=
ck
::
tensor_operation
::
cpu
::
device
::
LoopOver_MNK
;
static
constexpr
auto
LoopOver_MNK
=
ck
::
tensor_operation
::
cpu
::
device
::
LoopOver_MNK
;
static
constexpr
auto
LoopOver_MKN
=
ck
::
tensor_operation
::
cpu
::
device
::
LoopOver_MKN
;
static
constexpr
auto
LoopOver_MKN
=
ck
::
tensor_operation
::
cpu
::
device
::
LoopOver_MKN
;
void
add_device_conv2d_direct_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk
(
void
add_device_conv2d_direct_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
DeviceConvNDDirectFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
float
,
float
,
PT
,
PT
,
AddReluAdd
,
ConvFwdDefault
,
2
,
6
,
16
,
false
,
false
,
false
,
true
,
true
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MKN
}),
DeviceConvNDDirectFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
float
,
float
,
PT
,
PT
,
AddReluAdd
,
ConvFwdDefault
,
2
,
6
,
16
,
false
,
false
,
false
,
true
,
true
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MKN
}),
DeviceConvNDDirectFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
float
,
float
,
PT
,
PT
,
AddReluAdd
,
ConvFwdDefault
,
2
,
6
,
16
,
false
,
false
,
false
,
true
,
true
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MNK
})
DeviceConvNDDirectFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
float
,
float
,
PT
,
PT
,
AddReluAdd
,
ConvFwdDefault
,
2
,
6
,
16
,
false
,
false
,
false
,
true
,
true
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MNK
}),
// clang-format on
DeviceConvNDDirectFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
float
,
float
,
PT
,
PT
,
AddReluAdd
,
ConvFwdDefault
,
2
,
4
,
24
,
false
,
false
,
false
,
true
,
true
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MKN
}),
));
DeviceConvNDDirectFwdBiasActivationAddAvx2_Input_N_Hi_Wi_C_Weight_K_Y_X_C_K8_Output_N_Ho_Wo_K
<
float
,
float
,
float
,
float
,
float
,
PT
,
PT
,
AddReluAdd
,
ConvFwdDefault
,
2
,
4
,
24
,
false
,
false
,
false
,
true
,
true
,
false
>
({
0
,
0
,
0
,
DefaultGemmKLoop
,
LoopOver_MNK
})
}
// clang-format on
));
}
// namespace device_conv2d_fwd_bias_activation_add_avx2_instance
}
}
// namespace device
}
// namespace cpu
}
// namespace device_conv2d_fwd_bias_activation_add_avx2_instance
}
// namespace tensor_operation
}
// namespace device
}
// namespace ck
}
// namespace cpu
}
// namespace tensor_operation
}
// namespace ck
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment