Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0b997ce4
"include/ck/utility/array.hpp" did not exist on "9d59a39af303d9ef9a08bdf7d492ddfca33304ea"
Commit
0b997ce4
authored
Jul 18, 2022
by
Chao Liu
Browse files
adding conv multiple D
parent
69d323de
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
373 additions
and
257 deletions
+373
-257
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
+38
-35
include/ck/tensor_operation/gpu/device/device_convnd_fwd_multiple_d_nwc_kxc_nwk_xdl_cshuffle.hpp
...device_convnd_fwd_multiple_d_nwc_kxc_nwk_xdl_cshuffle.hpp
+293
-211
include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp
...ration/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp
+4
-4
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
...or_operation/gpu/element/unary_element_wise_operation.hpp
+38
-7
No files found.
example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp
View file @
0b997ce4
...
...
@@ -16,7 +16,7 @@ using S = ck::Sequence<Is...>;
using
InElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
WeiElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
OutElementOp
=
ck
::
tensor_operation
::
element_wise
::
UnaryConvert
;
static
constexpr
auto
ConvFwdDefault
=
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
Default
;
...
...
@@ -48,18 +48,18 @@ using DeviceConvNDFwdInstance = ck::tensor_operation::device::DeviceConvNdFwdNwc
2, // ABlockTransferSrcVectorDim
8, // ABlockTransferSrcScalarPerVector
8, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLds
Add
ExtraM
true, // ABlockLdsExtraM
S<4, 64, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim
8, // BBlockTransferSrcScalarPerVector
8, // BBlockTransferDstScalarPerVector_K1
true, // BBlockLds
Add
ExtraN
true, // BBlockLdsExtraN
7, // CThreadTransferSrcDstVectorDim
1>; // CThreadTransferDstScalarPerVector
#else
using
CShuffleDataType
=
floa
t
;
using
CShuffleDataType
=
ck
::
half_
t
;
template
<
ck
::
index_t
NDimSpatial
>
using
DeviceConvNDFwdInstance
=
...
...
@@ -69,16 +69,17 @@ using DeviceConvNDFwdInstance =
WeiDataType
,
//
AccDataType
,
//
CShuffleDataType
,
//
ck
::
Tuple
<>
,
ck
::
Tuple
<>
,
//
OutDataType
,
//
InElementOp
,
// Input Elementwise Operation
WeiElementOp
,
// Weights Elementwise Operation
OutElementOp
,
// Output Elementwise Operation
ConvFwdDefault
,
// ConvForwardSpecialization
1
,
//
256
,
// BlockSize
128
,
// MPerBlock
256
,
// NPerBlock
4
,
// K
0
PerBlock
32
,
// KPerBlock
8
,
// K1
32
,
// MPerXdl
32
,
// NPerXdl
...
...
@@ -90,16 +91,18 @@ using DeviceConvNDFwdInstance =
2
,
// ABlockTransferSrcVectorDim
8
,
// ABlockTransferSrcScalarPerVector
8
,
// ABlockTransferDstScalarPerVector_K1
true
,
// ABlockLds
Add
ExtraM
1
,
// ABlockLdsExtraM
S
<
4
,
64
,
1
>
,
// BBlockTransferThreadClusterLengths_K0_N_K1
S
<
1
,
0
,
2
>
,
// BBlockTransferThreadClusterArrangeOrder
S
<
1
,
0
,
2
>
,
// BBlockTransferSrcAccessOrder
2
,
// BBlockTransferSrcVectorDim
8
,
// BBlockTransferSrcScalarPerVector
8
,
// BBlockTransferDstScalarPerVector_K1
true
,
// BBlockLdsAddExtraN
7
,
// CThreadTransferSrcDstVectorDim
1
>
;
// CThreadTransferDstScalarPerVector
1
,
// BBlockLdsExtraN
1
,
1
,
S
<
1
,
32
,
1
,
8
>
,
8
>
;
#endif
int
main
(
int
argc
,
char
*
argv
[])
...
...
include/ck/tensor_operation/gpu/device/device_convnd_fwd_multiple_d_nwc_kxc_nwk_xdl_cshuffle.hpp
View file @
0b997ce4
This diff is collapsed.
Click to expand it.
include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp
View file @
0b997ce4
...
...
@@ -618,18 +618,18 @@ struct DeviceGemmMultipleD_Xdl_CShuffle : public DeviceGemmMultipleD<ALayout,
arg
.
block_2_etile_map_
);
};
float
av
e
_time
=
0
;
float
av
g
_time
=
0
;
if
(
GridwiseGemm
::
CalculateHasMainKBlockLoop
(
K
))
{
av
e
_time
=
launch_kernel
(
integral_constant
<
bool
,
true
>
{});
av
g
_time
=
launch_kernel
(
integral_constant
<
bool
,
true
>
{});
}
else
{
av
e
_time
=
launch_kernel
(
integral_constant
<
bool
,
false
>
{});
av
g
_time
=
launch_kernel
(
integral_constant
<
bool
,
false
>
{});
}
return
av
e
_time
;
return
av
g
_time
;
}
// polymorphic
...
...
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
View file @
0b997ce4
...
...
@@ -12,16 +12,47 @@ namespace element_wise {
struct
PassThrough
{
template
<
typename
T
>
__host__
__device__
void
operator
()(
T
&
y
,
const
T
&
x
)
const
template
<
typename
Y
,
typename
X
>
__host__
__device__
void
operator
()(
Y
&
y
,
const
X
&
x
)
const
;
template
<
>
__host__
__device__
void
operator
()
<
double
,
double
>
(
double
&
y
,
const
double
&
x
)
const
{
static_assert
(
is_same
<
T
,
float
>::
value
||
is_same
<
T
,
double
>::
value
||
is_same
<
T
,
half_t
>::
value
||
is_same
<
T
,
bhalf_t
>::
value
||
is_same
<
T
,
int32_t
>::
value
||
is_same
<
T
,
int8_t
>::
value
,
"Data type is not supported by this operation!"
);
y
=
x
;
}
template
<
>
__host__
__device__
void
operator
()
<
float
,
float
>
(
float
&
y
,
const
float
&
x
)
const
{
y
=
x
;
};
}
template
<
>
__host__
__device__
void
operator
()
<
half_t
,
half_t
>
(
half_t
&
y
,
const
half_t
&
x
)
const
{
y
=
x
;
}
template
<
>
__host__
__device__
void
operator
()
<
bhalf_t
,
bhalf_t
>
(
bhalf_t
&
y
,
const
bhalf_t
&
x
)
const
{
y
=
x
;
}
template
<
>
__host__
__device__
void
operator
()
<
int8_t
,
int8_t
>
(
int8_t
&
y
,
const
int8_t
&
x
)
const
{
y
=
x
;
}
};
struct
UnaryConvert
{
template
<
typename
Y
,
typename
X
>
__host__
__device__
void
operator
()(
Y
&
y
,
const
X
&
x
)
const
{
y
=
type_convert
<
Y
>
(
x
);
}
};
struct
Scale
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment