Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b9663356
Commit
b9663356
authored
Aug 02, 2019
by
Chao Liu
Browse files
experimenting new merged tensor copy
parent
a9a392b4
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
116 additions
and
6 deletions
+116
-6
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
...ridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
+16
-1
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp
.../tensor_operation/blockwise_generic_tensor_slice_copy.hpp
+16
-0
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp
...tensor_operation/threadwise_generic_tensor_slice_copy.hpp
+84
-5
No files found.
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
View file @
b9663356
...
@@ -295,9 +295,24 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw
...
@@ -295,9 +295,24 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw
// do work
// do work
for
(
index_t
e
=
0
;
e
<
E
;
e
+=
EPerBlock
)
for
(
index_t
e
=
0
;
e
<
E
;
e
+=
EPerBlock
)
{
{
// marching slicing window
#if 0 // debug
blockwise_in_copy.Run(p_in_global, p_in_block);
blockwise_in_copy.Run(p_in_global, p_in_block);
blockwise_wei_copy.Run(p_wei_global, p_wei_block);
blockwise_wei_copy.Run(p_wei_global, p_wei_block);
#else
using
InSrcMergedDimSubLengthsHack
=
Sequence
<
1
,
1
,
1
,
1
>
;
using
InDstMergedDimSubLengthsHack
=
Sequence
<
1
,
1
,
1
,
1
>
;
blockwise_in_copy
.
Run_hack
(
p_in_global
,
p_in_block
,
InSrcMergedDimSubLengthsHack
{},
InDstMergedDimSubLengthsHack
{});
using
WeiSrcMergedDimSubLengthsHack
=
Sequence
<
1
,
1
>
;
using
WeiDstMergedDimSubLengthsHack
=
Sequence
<
1
,
1
>
;
blockwise_wei_copy
.
Run_hack
(
p_wei_global
,
p_wei_block
,
WeiSrcMergedDimSubLengthsHack
{},
WeiDstMergedDimSubLengthsHack
{});
#endif
__syncthreads
();
__syncthreads
();
...
...
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp
View file @
b9663356
...
@@ -418,6 +418,22 @@ struct BlockwiseGenericTensorSliceCopy_v2
...
@@ -418,6 +418,22 @@ struct BlockwiseGenericTensorSliceCopy_v2
mThreadwiseCopy
.
Run
(
p_src
,
p_dst
);
mThreadwiseCopy
.
Run
(
p_src
,
p_dst
);
}
}
template
<
class
SrcMergedDimSubLengthsHack
,
class
DstMergedDimSubLengthsHack
>
__device__
void
Run_hack
(
const
TData
*
p_src
,
TData
*
p_dst
,
SrcMergedDimSubLengthsHack
,
DstMergedDimSubLengthsHack
)
const
{
// hacks to isolate merged dimension from normal dimensions, and calculate their offset
// seperately
// SrcMergedDimSliceLengthsHack has entry same as SliceLengths on src merged dimensions,
// but 1 on normal dimensions;
// SrcNormalDimSliceLengthsHack has entry same as SliceLengths on src normal dimensions,
// but 1 on merged dimensions;
mThreadwiseCopy
.
Run_hack
(
p_src
,
p_dst
,
SrcMergedDimSubLengthsHack
{},
DstMergedDimSubLengthsHack
{});
}
__device__
void
MoveSrcSlicingWindow
(
Array
<
index_t
,
nDim
>
step_sizes
,
bool
positive_direction
)
__device__
void
MoveSrcSlicingWindow
(
Array
<
index_t
,
nDim
>
step_sizes
,
bool
positive_direction
)
{
{
mThreadwiseCopy
.
MoveSrcSlicingWindow
(
step_sizes
,
positive_direction
);
mThreadwiseCopy
.
MoveSrcSlicingWindow
(
step_sizes
,
positive_direction
);
...
...
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp
View file @
b9663356
...
@@ -140,25 +140,104 @@ struct ThreadwiseGenericTensorSliceCopy_v2
...
@@ -140,25 +140,104 @@ struct ThreadwiseGenericTensorSliceCopy_v2
__device__
void
Run
(
const
TData
*
p_src
,
TData
*
p_dst
)
const
__device__
void
Run
(
const
TData
*
p_src
,
TData
*
p_dst
)
const
{
{
constexpr
auto
buffer_desc
=
make_ConstantTensorDescriptor_packed
(
SliceLengths
{});
TData
p_buffer_
[
buffer_desc
.
GetElementSpace
()];
TData
*
p_buffer
=
p_buffer_
;
#if 0
#if 0
static_ford<SliceLengths>{}([&](auto data_id) {
static_ford<SliceLengths>{}([&](auto data_id) {
p_
dst[(mDstSliceOrigin + data_id).GetOffset(
)] =
p_
buffer[buffer_desc.GetOffsetFromMultiIndex(data_id
)] =
p_src[(mSrcSliceOrigin + data_id).GetOffset()];
p_src[(mSrcSliceOrigin + data_id).GetOffset()];
});
});
static_ford<SliceLengths>{}([&](auto data_id) {
p_dst[(mDstSliceOrigin + data_id).GetOffset()] =
p_buffer[buffer_desc.GetOffsetFromMultiIndex(data_id)];
});
#elif
1
#elif
1
auto
src_slice_origin
=
mSrcSliceOrigin
;
auto
src_slice_origin
=
mSrcSliceOrigin
;
auto
dst_slice_origin
=
mDstSliceOrigin
;
auto
dst_slice_origin
=
mDstSliceOrigin
;
p_src
+=
src_slice_origin
.
RepositionOrigin
();
const
TData
*
p_src_tmp
=
p_src
+
src_slice_origin
.
RepositionOrigin
();
p_dst
+=
dst_slice_origin
.
RepositionOrigin
();
TData
*
p_dst_tmp
=
p_dst
+
dst_slice_origin
.
RepositionOrigin
();
static_ford
<
SliceLengths
>
{}([
&
](
auto
data_id
)
{
p_buffer
[
buffer_desc
.
GetOffsetFromMultiIndex
(
data_id
)]
=
p_src_tmp
[(
src_slice_origin
+
data_id
).
GetOffset
()];
});
static_ford
<
SliceLengths
>
{}([
&
](
auto
data_id
)
{
static_ford
<
SliceLengths
>
{}([
&
](
auto
data_id
)
{
p_dst
[(
dst_slice_origin
+
data_id
).
GetOffset
()]
=
p_dst
_tmp
[(
dst_slice_origin
+
data_id
).
GetOffset
()]
=
p_
src
[(
src_slice_origin
+
data_id
).
GetOffset
(
)];
p_
buffer
[
buffer_desc
.
GetOffsetFromMultiIndex
(
data_id
)];
});
});
#endif
#endif
}
}
template
<
class
SrcMergedDimSliceLengthsHack
,
class
DstMergedDimSliceLengthsHack
>
__device__
void
Run_hack
(
const
TData
*
p_src
,
TData
*
p_dst
,
SrcMergedDimSliceLengthsHack
,
DstMergedDimSliceLengthsHack
)
const
{
constexpr
auto
buffer_desc
=
make_ConstantTensorDescriptor_packed
(
SliceLengths
{});
TData
p_buffer_
[
buffer_desc
.
GetElementSpace
()];
TData
*
p_buffer
=
p_buffer_
;
// hacks to isolate merged dimension from normal dimensions, and calculate their offset
// seperately
// SrcMergedDimSliceLengthsHack has entry same as SliceLengths on src merged dimensions,
// but 1 on normal dimensions;
// SrcNormalDimSliceLengthsHack has entry same as SliceLengths on src normal dimensions,
// but 1 on merged dimensions;
using
SrcNormalDimSliceLengthsHack
=
decltype
((
SliceLengths
{}
+
Number
<
1
>
{})
-
SrcMergedDimSliceLengthsHack
{});
static_ford
<
SrcMergedDimSliceLengthsHack
>
{}([
&
](
auto
merged_dim_data_id_
)
{
constexpr
auto
merged_dim_data_id
=
decltype
(
merged_dim_data_id_
){};
const
TData
*
p_src_tmp
=
p_src
+
(
mSrcSliceOrigin
+
merged_dim_data_id
).
GetOffset
();
static_ford
<
SrcNormalDimSliceLengthsHack
>
{}([
&
](
auto
normal_dim_data_id_
)
{
constexpr
auto
normal_dim_data_id
=
decltype
(
normal_dim_data_id_
){};
constexpr
index_t
buffer_offset
=
buffer_desc
.
GetOffsetFromMultiIndex
(
merged_dim_data_id
+
normal_dim_data_id
);
constexpr
index_t
src_normal_offset
=
SrcDesc
::
GetOffsetFromMultiIndex
(
normal_dim_data_id
);
p_buffer
[
buffer_offset
]
=
p_src_tmp
[
src_normal_offset
];
});
});
// DstMergedDimSliceLengthsHack has entry same as SliceLengths on dst merged dimensions,
// but 1 on normal dimensions;
// DstNormalDimSliceLengthsHack has entry same as SliceLengths on dst normal dimensions,
// but 1 on merged dimensions;
using
DstNormalDimSliceLengthsHack
=
decltype
((
SliceLengths
{}
+
Number
<
1
>
{})
-
DstMergedDimSliceLengthsHack
{});
static_ford
<
DstMergedDimSliceLengthsHack
>
{}([
&
](
auto
merged_dim_data_id_
)
{
constexpr
auto
merged_dim_data_id
=
decltype
(
merged_dim_data_id_
){};
TData
*
p_dst_tmp
=
p_dst
+
(
mDstSliceOrigin
+
merged_dim_data_id
).
GetOffset
();
static_ford
<
DstNormalDimSliceLengthsHack
>
{}([
&
](
auto
normal_dim_data_id_
)
{
constexpr
auto
normal_dim_data_id
=
decltype
(
normal_dim_data_id_
){};
constexpr
index_t
buffer_offset
=
buffer_desc
.
GetOffsetFromMultiIndex
(
merged_dim_data_id
+
normal_dim_data_id
);
constexpr
index_t
dst_normal_offset
=
DstDesc
::
GetOffsetFromMultiIndex
(
normal_dim_data_id
);
p_dst_tmp
[
dst_normal_offset
]
=
p_buffer
[
buffer_offset
];
});
});
}
__device__
void
MoveSrcSlicingWindow
(
Array
<
index_t
,
nDim
>
step_sizes
,
bool
positive_direction
)
__device__
void
MoveSrcSlicingWindow
(
Array
<
index_t
,
nDim
>
step_sizes
,
bool
positive_direction
)
{
{
if
(
positive_direction
)
if
(
positive_direction
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment