Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
9d6938ff
Commit
9d6938ff
authored
Dec 17, 2020
by
Jing Zhang
Browse files
fixed buffer_load
parent
9f633f91
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
12 deletions
+8
-12
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
...sor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
+8
-12
No files found.
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
View file @
9d6938ff
...
...
@@ -95,7 +95,7 @@ struct ThreadwiseGenericTensorSliceCopy_v5
*
reinterpret_cast
<
SrcData
*>
(
&
p_dst
[
dst_offset
])
=
src_data
;
}
#if
1
#if
0
template <typename SrcData, index_t SrcDataPerAccess>
struct vector_data_load;
...
...
@@ -129,17 +129,12 @@ struct ThreadwiseGenericTensorSliceCopy_v5
}
};
#else
template
<
typename
SrcData
,
index_t
SrcDataPerAccess
>
struct
vector_data_load
template
<
index_t
SrcDataPerAccess
,
index_t
SrcDataRange
,
typename
SrcData
,
typename
SrcCoord
>
__device__
static
auto
vector_data_load
(
const
SrcData
*
p_src
,
const
SrcCoord
src_coord_begin
)
{
template
<
typename
SrcCoord
>
__device__
static
auto
run
(
const
float
*
p_src
,
const
SrcCoord
src_coord_begin
)
{
auto
src_offset
=
src_coord_begin
.
GetOffset
();
return
amd_buffer_load
<
SrcData
,
SrcDataPerAccess
>
(
p_src
,
src_offset
,
true
,
SrcDataPerAccess
);
}
};
auto
src_offset
=
src_coord_begin
.
GetOffset
();
return
amd_buffer_load
<
SrcData
,
SrcDataPerAccess
>
(
p_src
,
src_offset
,
true
,
SrcDataRange
);
}
#endif
...
...
@@ -202,7 +197,8 @@ struct ThreadwiseGenericTensorSliceCopy_v5
// load data from src to the long-vector buffer
const
auto
src_coord
=
mSrcSliceOrigin
+
to_multi_index
(
long_vector_data_begin_id
);
auto
src_buff
=
vector_data_load
<
SrcData
,
SrcDataPerRead
>::
run
(
p_src
,
src_coord
);
auto
src_buff
=
vector_data_load
<
SrcDataPerRead
,
SrcDesc
::
GetElementSpace
()
>
(
p_src
,
src_coord
);
// store data from the long-vector buffer to dst
constexpr
auto
buff_off
=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment