Commit 9d6938ff authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed buffer_load

parent 9f633f91
...@@ -95,7 +95,7 @@ struct ThreadwiseGenericTensorSliceCopy_v5 ...@@ -95,7 +95,7 @@ struct ThreadwiseGenericTensorSliceCopy_v5
*reinterpret_cast<SrcData*>(&p_dst[dst_offset]) = src_data; *reinterpret_cast<SrcData*>(&p_dst[dst_offset]) = src_data;
} }
#if 1 #if 0
template <typename SrcData, index_t SrcDataPerAccess> template <typename SrcData, index_t SrcDataPerAccess>
struct vector_data_load; struct vector_data_load;
...@@ -129,17 +129,12 @@ struct ThreadwiseGenericTensorSliceCopy_v5 ...@@ -129,17 +129,12 @@ struct ThreadwiseGenericTensorSliceCopy_v5
} }
}; };
#else #else
template <typename SrcData, index_t SrcDataPerAccess> template <index_t SrcDataPerAccess, index_t SrcDataRange, typename SrcData, typename SrcCoord>
struct vector_data_load __device__ static auto vector_data_load(const SrcData* p_src, const SrcCoord src_coord_begin)
{ {
template <typename SrcCoord> auto src_offset = src_coord_begin.GetOffset();
__device__ static auto run(const float* p_src, const SrcCoord src_coord_begin) return amd_buffer_load<SrcData, SrcDataPerAccess>(p_src, src_offset, true, SrcDataRange);
{ }
auto src_offset = src_coord_begin.GetOffset();
return amd_buffer_load<SrcData, SrcDataPerAccess>(
p_src, src_offset, true, SrcDataPerAccess);
}
};
#endif #endif
...@@ -202,7 +197,8 @@ struct ThreadwiseGenericTensorSliceCopy_v5 ...@@ -202,7 +197,8 @@ struct ThreadwiseGenericTensorSliceCopy_v5
// load data from src to the long-vector buffer // load data from src to the long-vector buffer
const auto src_coord = mSrcSliceOrigin + to_multi_index(long_vector_data_begin_id); const auto src_coord = mSrcSliceOrigin + to_multi_index(long_vector_data_begin_id);
auto src_buff = vector_data_load<SrcData, SrcDataPerRead>::run(p_src, src_coord); auto src_buff =
vector_data_load<SrcDataPerRead, SrcDesc::GetElementSpace()>(p_src, src_coord);
// store data from the long-vector buffer to dst // store data from the long-vector buffer to dst
constexpr auto buff_off = constexpr auto buff_off =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment