Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
e17b495d
"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "beb932c5d111872c5e45387e7b1b2b3dd0524a47"
Commit
e17b495d
authored
May 24, 2019
by
Chao Liu
Browse files
refactor
parent
8fcf3f1e
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
12 additions
and
12 deletions
+12
-12
src/include/blockwise_generic_tensor_slice_op.hip.hpp
src/include/blockwise_generic_tensor_slice_op.hip.hpp
+5
-5
src/include/gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp
...plicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp
+1
-1
src/include/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp
...ise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp
+1
-1
src/include/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp
...dwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp
+4
-4
src/include/threadwise_tensor_slice_op.hip.hpp
src/include/threadwise_tensor_slice_op.hip.hpp
+1
-1
No files found.
src/include/blockwise_
merged
_tensor_slice_op.hip.hpp
→
src/include/blockwise_
generic
_tensor_slice_op.hip.hpp
View file @
e17b495d
...
@@ -14,7 +14,7 @@ template <index_t BlockSize,
...
@@ -14,7 +14,7 @@ template <index_t BlockSize,
class
DstAccessOrder
,
class
DstAccessOrder
,
index_t
SrcDataPerRead
,
index_t
SrcDataPerRead
,
index_t
DstDataPerRead
>
index_t
DstDataPerRead
>
struct
BlockwiseTensorSliceCopy_
generic_
v1
struct
Blockwise
Generic
TensorSliceCopy_v1
{
{
static
constexpr
index_t
nDim
=
SrcDesc
::
GetNumOfDimension
();
static
constexpr
index_t
nDim
=
SrcDesc
::
GetNumOfDimension
();
...
@@ -22,7 +22,7 @@ struct BlockwiseTensorSliceCopy_generic_v1
...
@@ -22,7 +22,7 @@ struct BlockwiseTensorSliceCopy_generic_v1
index_t
mDstMyThreadOffset
;
index_t
mDstMyThreadOffset
;
__device__
__device__
BlockwiseTensorSliceCopy_
generic_
v1
(
Array
<
index_t
,
nDim
>
src_block_data_multi_id_begin
,
Blockwise
Generic
TensorSliceCopy_v1
(
Array
<
index_t
,
nDim
>
src_block_data_multi_id_begin
,
Array
<
index_t
,
nDim
>
dst_block_data_multi_id_begin
)
Array
<
index_t
,
nDim
>
dst_block_data_multi_id_begin
)
{
{
// check NDim consistent
// check NDim consistent
...
@@ -155,7 +155,7 @@ struct BlockwiseTensorSliceCopy_generic_v1
...
@@ -155,7 +155,7 @@ struct BlockwiseTensorSliceCopy_generic_v1
const
index_t
clipboard_offset
=
thread_tensor_desc
.
GetOffsetFromMultiIndex
(
const
index_t
clipboard_offset
=
thread_tensor_desc
.
GetOffsetFromMultiIndex
(
clipboard_data_multi_id_begin
);
// cannot not constexpr, why?
clipboard_data_multi_id_begin
);
// cannot not constexpr, why?
threadwise_tensor_slice_copy
_generic
(
SrcDesc
{},
threadwise_
generic_
tensor_slice_copy
(
SrcDesc
{},
p_src
+
src_offset
+
mSrcMyThreadOffset
,
p_src
+
src_offset
+
mSrcMyThreadOffset
,
make_zero_array
<
index_t
,
nDim
>
(),
make_zero_array
<
index_t
,
nDim
>
(),
thread_tensor_desc
,
thread_tensor_desc
,
...
@@ -193,7 +193,7 @@ struct BlockwiseTensorSliceCopy_generic_v1
...
@@ -193,7 +193,7 @@ struct BlockwiseTensorSliceCopy_generic_v1
const
index_t
dst_offset
=
DstDesc
{}.
GetOffsetFromMultiIndex
(
const
index_t
dst_offset
=
DstDesc
{}.
GetOffsetFromMultiIndex
(
dst_data_multi_id_begin
);
// cannot not constexpr, why?
dst_data_multi_id_begin
);
// cannot not constexpr, why?
threadwise_tensor_slice_copy
_generic
(
thread_tensor_desc
,
threadwise_
generic_
tensor_slice_copy
(
thread_tensor_desc
,
p_clipboard
+
clipboard_offset
,
p_clipboard
+
clipboard_offset
,
make_zero_array
<
index_t
,
nDim
>
(),
make_zero_array
<
index_t
,
nDim
>
(),
DstDesc
{},
DstDesc
{},
...
...
src/include/gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp
View file @
e17b495d
...
@@ -474,7 +474,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw
...
@@ -474,7 +474,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw
map_out_global2thread,
map_out_global2thread,
Number<OutThreadCopyDataPerWrite_W>{});
Number<OutThreadCopyDataPerWrite_W>{});
#else
#else
threadwise_tensor_slice_copy
_generic
(
threadwise_
generic_
tensor_slice_copy
(
out_10d_thread_desc
.
ReorderGivenNew2Old
(
map_out_global2thread
),
out_10d_thread_desc
.
ReorderGivenNew2Old
(
map_out_global2thread
),
p_out_thread
,
p_out_thread
,
make_zero_array
<
index_t
,
10
>
(),
make_zero_array
<
index_t
,
10
>
(),
...
...
src/include/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp
View file @
e17b495d
...
@@ -423,7 +423,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_nchw_cyxk_nkhw
...
@@ -423,7 +423,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_nchw_cyxk_nkhw
map_out_global2thread,
map_out_global2thread,
Number<OutThreadCopyDataPerWrite_W>{});
Number<OutThreadCopyDataPerWrite_W>{});
#else
#else
threadwise_tensor_slice_copy
_generic
(
threadwise_
generic_
tensor_slice_copy
(
out_10d_thread_desc
.
ReorderGivenNew2Old
(
map_out_global2thread
),
out_10d_thread_desc
.
ReorderGivenNew2Old
(
map_out_global2thread
),
p_out_thread
,
p_out_thread
,
make_zero_array
<
index_t
,
10
>
(),
make_zero_array
<
index_t
,
10
>
(),
...
...
src/include/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp
View file @
e17b495d
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMergedTensorDescriptor.hip.hpp"
#include "ConstantMergedTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_
merged
_tensor_slice_op.hip.hpp"
#include "blockwise_
generic
_tensor_slice_op.hip.hpp"
#include "blockwise_gemm.hip.hpp"
#include "blockwise_gemm.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
...
@@ -123,7 +123,7 @@ struct GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw
...
@@ -123,7 +123,7 @@ struct GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw
// input blockwise copy
// input blockwise copy
// slice a merged tensor, reorder and copy to a normal tensor
// slice a merged tensor, reorder and copy to a normal tensor
// this copy operator already has blockwise offset built-in
// this copy operator already has blockwise offset built-in
const
auto
blockwise_in_copy
=
BlockwiseTensorSliceCopy_
generic_
v1
<
const
auto
blockwise_in_copy
=
Blockwise
Generic
TensorSliceCopy_v1
<
BlockSize
,
BlockSize
,
Float
,
Float
,
decltype
(
in_c_n1_b_n2_global_merged_desc
),
decltype
(
in_c_n1_b_n2_global_merged_desc
),
...
@@ -152,7 +152,7 @@ struct GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw
...
@@ -152,7 +152,7 @@ struct GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw
// this copy operator already have blockwise offset built-in
// this copy operator already have blockwise offset built-in
const
auto
blockwise_wei_copy
=
const
auto
blockwise_wei_copy
=
#if 0
#if 0
BlockwiseTensorSliceCopy_
generic_
v1<BlockSize,
Blockwise
Generic
TensorSliceCopy_v1<BlockSize,
Float,
Float,
decltype(wei_c_k_global_desc),
decltype(wei_c_k_global_desc),
decltype(wei_c_k_block_desc),
decltype(wei_c_k_block_desc),
...
@@ -318,7 +318,7 @@ struct GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw
...
@@ -318,7 +318,7 @@ struct GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw
out_k_n1_b_n2_global_merged_desc
.
GetOffsetFromMultiIndex
(
out_k_n1_b_n2_global_merged_desc
.
GetOffsetFromMultiIndex
(
k_thread_data_on_global
,
0
,
b_thread_data_on_global
,
0
);
k_thread_data_on_global
,
0
,
b_thread_data_on_global
,
0
);
threadwise_tensor_slice_copy
_generic
(
out_n0_n1_n2_k0_k1_k2_h_w_thread_desc
,
threadwise_
generic_
tensor_slice_copy
(
out_n0_n1_n2_k0_k1_k2_h_w_thread_desc
,
p_out_thread
,
p_out_thread
,
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
},
out_n0_n1_n2_k0_k1_k2_h_w_global_mem_desc
,
out_n0_n1_n2_k0_k1_k2_h_w_global_mem_desc
,
...
...
src/include/threadwise_tensor_slice_op.hip.hpp
View file @
e17b495d
...
@@ -194,7 +194,7 @@ threadwise_tensor_slice_copy_reorder_given_dst2src_v3(SrcDesc,
...
@@ -194,7 +194,7 @@ threadwise_tensor_slice_copy_reorder_given_dst2src_v3(SrcDesc,
}
}
template
<
class
Float
,
class
SrcDesc
,
class
DstDesc
,
class
SliceLengths
,
class
DimAccessOrder
>
template
<
class
Float
,
class
SrcDesc
,
class
DstDesc
,
class
SliceLengths
,
class
DimAccessOrder
>
__device__
void
threadwise_tensor_slice_copy
_generic
(
__device__
void
threadwise_
generic_
tensor_slice_copy
(
SrcDesc
,
SrcDesc
,
const
Float
*
__restrict__
p_src
,
const
Float
*
__restrict__
p_src
,
Array
<
index_t
,
SrcDesc
::
GetNumOfDimension
()
>
src_multi_id_begin
,
Array
<
index_t
,
SrcDesc
::
GetNumOfDimension
()
>
src_multi_id_begin
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment