Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
05c484e2
Commit
05c484e2
authored
Jul 20, 2022
by
Chao Liu
Browse files
adding group conv
parent
3474c777
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
259 additions
and
24 deletions
+259
-24
example/06_conv2d_fwd_bias_relu/conv2d_fwd_bias_relu_xdl_fp16.cpp
...06_conv2d_fwd_bias_relu/conv2d_fwd_bias_relu_xdl_fp16.cpp
+1
-1
example/28_group_convnd_fwd_bias_relu/CMakeLists.txt
example/28_group_convnd_fwd_bias_relu/CMakeLists.txt
+1
-0
example/28_group_convnd_fwd_bias_relu/group_convnd_fwd_bias_relu_xdl_fp16.cpp
...vnd_fwd_bias_relu/group_convnd_fwd_bias_relu_xdl_fp16.cpp
+170
-0
example/CMakeLists.txt
example/CMakeLists.txt
+1
-0
include/ck/tensor_operation/gpu/device/device_conv_fwd_multiple_d.hpp
...ensor_operation/gpu/device/device_conv_fwd_multiple_d.hpp
+61
-0
include/ck/tensor_operation/gpu/device/device_convnd_fwd_multiple_d_nwc_kxc_nwk_xdl_cshuffle.hpp
...device_convnd_fwd_multiple_d_nwc_kxc_nwk_xdl_cshuffle.hpp
+25
-23
No files found.
example/06_conv2d_fwd_bias_relu/conv2d_fwd_bias_relu_xdl_fp16.cpp
View file @
05c484e2
...
@@ -93,7 +93,7 @@ void print_helper_msg()
...
@@ -93,7 +93,7 @@ void print_helper_msg()
<<
"Following arguments (depending on number of spatial dims):
\n
"
<<
"Following arguments (depending on number of spatial dims):
\n
"
<<
" N, K, C,
\n
"
<<
" N, K, C,
\n
"
<<
" <filter spatial dimensions>, (ie Y, X for 2D)
\n
"
<<
" <filter spatial dimensions>, (ie Y, X for 2D)
\n
"
<<
" <in
_n_hi_wi_c
image spatial dimensions>, (ie Hi, Wi for 2D)
\n
"
<<
" <in
put
image spatial dimensions>, (ie Hi, Wi for 2D)
\n
"
<<
" <strides>, (ie Sy, Sx for 2D)
\n
"
<<
" <strides>, (ie Sy, Sx for 2D)
\n
"
<<
" <dilations>, (ie Dy, Dx for 2D)
\n
"
<<
" <dilations>, (ie Dy, Dx for 2D)
\n
"
<<
" <left padding>, (ie LeftPy, LeftPx for 2D)
\n
"
<<
" <left padding>, (ie LeftPy, LeftPx for 2D)
\n
"
...
...
example/28_group_convnd_fwd_bias_relu/CMakeLists.txt
0 → 100644
View file @
05c484e2
add_example_executable
(
example_group_convnd_fwd_bias_relu_xdl_fp16 group_convnd_fwd_bias_relu_xdl_fp16.cpp
)
example/28_group_convnd_fwd_bias_relu/group_convnd_fwd_bias_relu_xdl_fp16.cpp
0 → 100644
View file @
05c484e2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "../09_convnd_fwd/convnd_fwd_common.hpp"
#include "ck/tensor_operation/gpu/device/device_convnd_fwd_nwc_kxc_nwk_xdl.hpp"
#include "ck/tensor_operation/gpu/device/device_convnd_fwd_multiple_d_nwc_kxc_nwk_xdl_cshuffle.hpp"
using
InDataType
=
ck
::
half_t
;
using
WeiDataType
=
ck
::
half_t
;
using
OutDataType
=
ck
::
half_t
;
using
AccDataType
=
float
;
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
UnaryConvert
;
using
CShuffleDataType
=
ck
::
half_t
;
static
constexpr
auto
ConvSpec
=
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
Default
;
static
constexpr
auto
GemmSpec
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
template
<
ck
::
index_t
NDimSpatial
>
using
DeviceConvNDFwdInstance
=
ck
::
tensor_operation
::
device
::
DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
<
NDimSpatial
,
//
InDataType
,
//
WeiDataType
,
//
AccDataType
,
//
CShuffleDataType
,
//
ck
::
Tuple
<>
,
//
OutDataType
,
//
InElementOp
,
// Input Elementwise Operation
WeiElementOp
,
// Weights Elementwise Operation
OutElementOp
,
// Output Elementwise Operation
ConvSpec
,
// ConvForwardSpecialization
GemmSpec
,
// GemmSpecialization
1
,
//
256
,
// BlockSize
128
,
// MPerBlock
256
,
// NPerBlock
32
,
// KPerBlock
8
,
// K1
32
,
// MPerXdl
32
,
// NPerXdl
2
,
// MXdlPerWave
4
,
// NXdlPerWave
S
<
4
,
64
,
1
>
,
// ABlockTransferThreadClusterLengths_K0_M_K1
S
<
1
,
0
,
2
>
,
// ABlockTransferThreadClusterArrangeOrder
S
<
1
,
0
,
2
>
,
// ABlockTransferSrcAccessOrder
2
,
// ABlockTransferSrcVectorDim
8
,
// ABlockTransferSrcScalarPerVector
8
,
// ABlockTransferDstScalarPerVector_K1
1
,
// ABlockLdsExtraM
S
<
4
,
64
,
1
>
,
// BBlockTransferThreadClusterLengths_K0_N_K1
S
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder
S
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder
2
,
// BBlockTransferSrcVectorDim
8
,
// BBlockTransferSrcScalarPerVector
8
,
// BBlockTransferDstScalarPerVector_K1
1
,
// BBlockLdsExtraN
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
int
main
(
int
argc
,
char
*
argv
[])
{
print_helper_msg
();
bool
do_verification
=
true
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
int
num_dim_spatial
=
2
;
ck
::
utils
::
conv
::
ConvParam
params
{
2
,
128
,
256
,
192
,
{
3
,
3
},
{
71
,
71
},
{
2
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}};
if
(
argc
==
1
)
{
// use default
}
else
if
(
argc
==
4
)
{
do_verification
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
time_kernel
=
std
::
stoi
(
argv
[
3
]);
}
else
{
do_verification
=
std
::
stoi
(
argv
[
1
]);
init_method
=
std
::
stoi
(
argv
[
2
]);
time_kernel
=
std
::
stoi
(
argv
[
3
]);
num_dim_spatial
=
std
::
stoi
(
argv
[
4
]);
params
=
parse_conv_params
(
num_dim_spatial
,
5
,
argv
);
}
const
auto
in_element_op
=
InElementOp
{};
const
auto
wei_element_op
=
WeiElementOp
{};
const
auto
out_element_op
=
OutElementOp
{};
if
(
num_dim_spatial
==
1
)
{
return
run_conv_fwd
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
DeviceConvNDFwdInstance
<
1
>>
(
do_verification
,
init_method
,
time_kernel
,
params
,
in_element_op
,
wei_element_op
,
out_element_op
);
}
else
if
(
num_dim_spatial
==
2
)
{
return
run_conv_fwd
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
DeviceConvNDFwdInstance
<
2
>>
(
do_verification
,
init_method
,
time_kernel
,
params
,
in_element_op
,
wei_element_op
,
out_element_op
);
}
else
if
(
num_dim_spatial
==
3
)
{
return
run_conv_fwd
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
InDataType
,
WeiDataType
,
OutDataType
,
InElementOp
,
WeiElementOp
,
OutElementOp
,
DeviceConvNDFwdInstance
<
3
>>
(
do_verification
,
init_method
,
time_kernel
,
params
,
in_element_op
,
wei_element_op
,
out_element_op
);
}
return
0
;
}
example/CMakeLists.txt
View file @
05c484e2
...
@@ -44,3 +44,4 @@ add_subdirectory(24_batched_gemm_c_permute)
...
@@ -44,3 +44,4 @@ add_subdirectory(24_batched_gemm_c_permute)
add_subdirectory
(
25_gemm_bias_c_permute
)
add_subdirectory
(
25_gemm_bias_c_permute
)
add_subdirectory
(
26_contraction
)
add_subdirectory
(
26_contraction
)
add_subdirectory
(
27_layernorm
)
add_subdirectory
(
27_layernorm
)
add_subdirectory
(
28_group_convnd_fwd_bias_relu
)
include/ck/tensor_operation/gpu/device/device_conv_fwd_multiple_d.hpp
0 → 100644
View file @
05c484e2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
// GEMM:
// input : input image A[N, Hi, Wi, C],
// input : weight B[K, Y, X, C],
// input : D0[N, Ho, Wo, K], D1[N, Ho, Wo, K], ...
// output : output image E[N, Ho, Wo, K]
// C = a_op(A) * b_op(B)
// E = cde_op(C, D0, D1, ...)
// Assume:
// D0, D1, ... and E have the same layout
template
<
ck
::
index_t
NDimSpatial
,
typename
ALayout
,
typename
BLayout
,
typename
DELayout
,
typename
ADataType
,
typename
BDataType
,
typename
DsDataType
,
typename
EDataType
,
typename
AElementwiseOperation
,
typename
BElementwiseOperation
,
typename
CDEElementwiseOperation
>
struct
DeviceConvFwdMultipleD
:
public
BaseOperator
{
static
constexpr
index_t
NumDTensor
=
DsDataType
::
Size
();
virtual
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
ADataType
*
p_a
,
const
BDataType
*
p_b
,
EDataType
*
p_e
,
ck
::
index_t
N
,
ck
::
index_t
K
,
ck
::
index_t
C
,
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
,
AElementwiseOperation
a_element_op
,
BElementwiseOperation
b_element_op
,
CDEElementwiseOperation
cde_element_op
)
=
0
;
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
};
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
include/ck/tensor_operation/gpu/device/device_convnd_fwd_multiple_d_nwc_kxc_nwk_xdl_cshuffle.hpp
View file @
05c484e2
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_conv_fwd.hpp"
#include "ck/tensor_operation/gpu/device/device_conv_fwd
_multiple_d
.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
...
@@ -151,25 +151,27 @@ template <index_t NDimSpatial,
...
@@ -151,25 +151,27 @@ template <index_t NDimSpatial,
index_t
CDEBlockTransferScalarPerVector_NPerBlock
,
index_t
CDEBlockTransferScalarPerVector_NPerBlock
,
LoopScheduler
LoopSched
=
make_default_loop_scheduler
()>
LoopScheduler
LoopSched
=
make_default_loop_scheduler
()>
struct
DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
struct
DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
:
public
DeviceConvFwd
<
NDimSpatial
,
:
public
DeviceConvFwdMultipleD
<
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
NDimSpatial
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>>
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>>
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ADataType
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
BDataType
,
ADataType
,
EDataType
,
BDataType
,
AElementwiseOperation
,
DsDataType
,
BElementwiseOperation
,
EDataType
,
CDEElementwiseOperation
>
AElementwiseOperation
,
BElementwiseOperation
,
CDEElementwiseOperation
>
{
{
using
DeviceOp
=
DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
;
using
DeviceOp
=
DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
;
...
@@ -1130,9 +1132,9 @@ struct DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
...
@@ -1130,9 +1132,9 @@ struct DeviceConvNdFwdMultipleD_NwcKxcNwk_Xdl_CShuffle
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
std
::
unique_ptr
<
BaseArgument
>
std
::
unique_ptr
<
BaseArgument
>
MakeArgumentPointer
(
const
void
*
p_in_grid
,
MakeArgumentPointer
(
const
ADataType
*
p_in_grid
,
const
void
*
p_wei_grid
,
const
BDataType
*
p_wei_grid
,
void
*
p_out_grid
,
EDataType
*
p_out_grid
,
index_t
N
,
index_t
N
,
index_t
K
,
index_t
K
,
index_t
C
,
index_t
C
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment