Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
10cabc2d
Commit
10cabc2d
authored
Mar 30, 2023
by
Adam Osewski
Browse files
Merge remote-tracking branch 'origin/develop' into aosewski/ggemm_splitk
parents
baf68688
091570f5
Changes
47
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
248 additions
and
2 deletions
+248
-2
library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp
...rouped_convolution_bias_forward_perlayer_quantization.hpp
+92
-0
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp
...pu/quantization/conv2d_fwd/conv2d_quantization_common.hpp
+9
-0
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp
..._conv2d_dl_bias_perchannel_quantization_int8_instance.cpp
+36
-0
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp
...ce_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp
+37
-0
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp
...conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp
+35
-0
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp
...e_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp
+37
-0
script/cmake-ck-dev.sh
script/cmake-ck-dev.sh
+2
-2
No files found.
library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp
View file @
10cabc2d
...
...
@@ -49,6 +49,21 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
Add_Activation_Mul_Clamp
<
Relu
>>>>&
instances
);
void
add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
2
,
GNHWC
,
GKYXC
,
GK_Tuple
,
GNHWK
,
int8_t
,
int8_t
,
I32_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
Add_Mul_Activation_Mul_Clamp
<
TanH
>>>>&
instances
);
void
add_device_conv2d_xdl_bias_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
2
,
...
...
@@ -80,6 +95,22 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
Add_Activation_Mul_Clamp
<
Relu
>>>>&
instances
);
void
add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
2
,
GNHWC
,
GKYXC
,
GK_Tuple
,
GNHWK
,
int8_t
,
int8_t
,
I32_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
Add_Mul_Activation_Mul_Clamp
<
TanH
>>>>&
instances
);
// piecewise activation function
template
<
ck
::
index_t
NumDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
...
...
@@ -145,6 +176,67 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
}
};
// non-piecewise activation function
template
<
ck
::
index_t
NumDimSpatial
,
typename
InLayout
,
typename
WeiLayout
,
typename
DsLayout
,
typename
OutLayout
,
typename
InDataType
,
typename
WeiDataType
,
typename
DsDataType
,
typename
OutDataType
,
typename
Activation
>
struct
DeviceOperationInstanceFactory
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
InLayout
,
WeiLayout
,
DsLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
DsDataType
,
OutDataType
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
Add_Mul_Activation_Mul_Clamp
<
Activation
>>>
{
using
DeviceOp
=
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
InLayout
,
WeiLayout
,
DsLayout
,
OutLayout
,
InDataType
,
WeiDataType
,
DsDataType
,
OutDataType
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
Add_Mul_Activation_Mul_Clamp
<
Activation
>>
;
static
auto
GetInstances
()
{
std
::
vector
<
std
::
unique_ptr
<
DeviceOp
>>
op_ptrs
;
if
constexpr
(
NumDimSpatial
==
2
&&
is_same_v
<
InLayout
,
GNHWC
>
&&
is_same_v
<
WeiLayout
,
GKYXC
>
&&
is_same_v
<
DsLayout
,
GK_Tuple
>
&&
is_same_v
<
OutLayout
,
GNHWK
>
)
{
if
constexpr
(
is_same_v
<
InDataType
,
int8_t
>
&&
is_same_v
<
WeiDataType
,
int8_t
>
&&
is_same_v
<
DsDataType
,
I32_Tuple
>
&&
is_same_v
<
OutDataType
,
int8_t
>
)
{
if
constexpr
(
is_same_v
<
Activation
,
TanH
>
)
{
add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances
(
op_ptrs
);
add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances
(
op_ptrs
);
}
}
}
return
op_ptrs
;
}
};
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp
View file @
10cabc2d
...
...
@@ -25,6 +25,7 @@ using GNHWK = ck::tensor_layout::convolution::GNHWK;
using
GK
=
ck
::
tensor_layout
::
convolution
::
G_K
;
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
Relu
=
ck
::
tensor_operation
::
element_wise
::
Relu
;
using
TanH
=
ck
::
tensor_operation
::
element_wise
::
TanH
;
using
GK_Tuple
=
ck
::
Tuple
<
GK
>
;
using
GK_GK_Tuple
=
ck
::
Tuple
<
GK
,
GK
>
;
...
...
@@ -32,17 +33,25 @@ using I32_Tuple = ck::Tuple<int32_t>;
using
F32_Tuple
=
ck
::
Tuple
<
float
>
;
using
I32_F32_Tuple
=
ck
::
Tuple
<
int32_t
,
float
>
;
// perlayer
using
Mul_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Activation_Mul_Clamp
<
PassThrough
>
;
using
Relu_Mul_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Activation_Mul_Clamp
<
Relu
>
;
// bias + perlayer
using
Add_Mul_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Add_Activation_Mul_Clamp
<
PassThrough
>
;
using
Add_Relu_Mul_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Add_Activation_Mul_Clamp
<
Relu
>
;
using
Add_Mul_TanH_Mul_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Add_Mul_Activation_Mul_Clamp
<
TanH
>
;
// perchannel
using
Mul2_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Activation_Mul2_Clamp
<
PassThrough
>
;
using
Relu_Mul2_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Activation_Mul2_Clamp
<
Relu
>
;
// bias + perchannel
using
Add_Mul2_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Add_Activation_Mul2_Clamp
<
PassThrough
>
;
using
Add_Relu_Mul2_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Add_Activation_Mul2_Clamp
<
Relu
>
;
using
Add_Mul2_TanH_Mul_Clamp
=
ck
::
tensor_operation
::
element_wise
::
Add_Mul2_Activation_Mul_Clamp
<
TanH
>
;
static
constexpr
ck
::
index_t
NDimSpatial
=
2
;
static
constexpr
auto
GemmSpec
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp
View file @
10cabc2d
...
...
@@ -76,6 +76,42 @@ void add_device_conv2d_dl_bias_relu_perchannel_quantization_int8_instances(
ConvFwd1x1S1P0
,
4
>
{});
}
void
add_device_conv2d_dl_bias_tanh_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
GNHWC
,
GKYXC
,
GK_GK_Tuple
,
GNHWK
,
int8_t
,
int8_t
,
I32_F32_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
Add_Mul2_TanH_Mul_Clamp
>>>&
instances
)
{
// dl
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwdDefault
,
4
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwd1x1P0
,
4
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwd1x1S1P0
,
4
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp
View file @
10cabc2d
...
...
@@ -76,6 +76,43 @@ void add_device_conv2d_dl_bias_relu_perlayer_quantization_int8_instances(
ConvFwd1x1S1P0
,
4
>
{});
}
void
add_device_conv2d_dl_bias_tanh_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
GNHWC
,
GKYXC
,
GK_Tuple
,
GNHWK
,
int8_t
,
int8_t
,
I32_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
Add_Mul_TanH_Mul_Clamp
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwdDefault
,
4
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwd1x1P0
,
4
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_dl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwd1x1S1P0
,
4
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp
View file @
10cabc2d
...
...
@@ -74,6 +74,41 @@ void add_device_conv2d_xdl_bias_relu_perchannel_quantization_int8_instances(
ConvFwd1x1S1P0
,
8
>
{});
}
void
add_device_conv2d_xdl_bias_tanh_perchannel_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
GNHWC
,
GKYXC
,
GK_GK_Tuple
,
GNHWK
,
int8_t
,
int8_t
,
I32_F32_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
Add_Mul2_TanH_Mul_Clamp
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwdDefault
,
8
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwd1x1P0
,
8
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
Add_Mul2_TanH_Mul_Clamp
,
ConvFwd1x1S1P0
,
8
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp
View file @
10cabc2d
...
...
@@ -76,6 +76,43 @@ void add_device_conv2d_xdl_bias_relu_perlayer_quantization_int8_instances(
ConvFwd1x1S1P0
,
8
>
{});
}
void
add_device_conv2d_xdl_bias_tanh_perlayer_quantization_int8_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleD
<
NDimSpatial
,
GNHWC
,
GKYXC
,
GK_Tuple
,
GNHWK
,
int8_t
,
int8_t
,
I32_Tuple
,
int8_t
,
PassThrough
,
PassThrough
,
Add_Mul_TanH_Mul_Clamp
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwdDefault
,
8
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwd1x1P0
,
8
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_TanH_Mul_Clamp
,
ConvFwd1x1S1P0
,
8
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
...
...
script/cmake-ck-dev.sh
View file @
10cabc2d
...
...
@@ -8,8 +8,8 @@ MY_PROJECT_SOURCE=$1
cmake
\
-D
CMAKE_PREFIX_PATH
=
/opt/rocm
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_CXX_FLAGS
=
"-std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE
-mcumode
\
-
mno-wavefrontsize64 -Wno-gnu-line-marker -save-temps=
$PWD
"
\
-D
CMAKE_CXX_FLAGS
=
"-std=c++17 -O3 -ftemplate-backtrace-limit=0
-fPIE
-Wno-gnu-line-marker
\
-
save-temps=
$PWD
"
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
BUILD_DEV
=
ON
\
-D
GPU_TARGETS
=
"gfx908;gfx90a"
\
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment