Commit 36de63ff authored by Chao Liu's avatar Chao Liu
Browse files

fix bug

parent 888f1d68
...@@ -1469,9 +1469,20 @@ struct ThreadwiseDynamicTensorSliceTransfer_v4 ...@@ -1469,9 +1469,20 @@ struct ThreadwiseDynamicTensorSliceTransfer_v4
const bool is_src_valid = coordinate_has_valid_offset_assuming_visible_index_is_valid( const bool is_src_valid = coordinate_has_valid_offset_assuming_visible_index_is_valid(
src_desc, src_data_coord); src_desc, src_data_coord);
#if 0
// TODO: this is slooooooooow!
src_tmp_buf.template AsType<src_vector_t>()(Number<0>{}) = src_tmp_buf.template AsType<src_vector_t>()(Number<0>{}) =
is_src_valid ? src_buf.template AsType<src_vector_t>()[src_data_coord.GetOffset()] is_src_valid ? src_buf.template AsType<src_vector_t>()[src_data_coord.GetOffset() /
SrcScalarPerVector]
: src_vector_t{0}; : src_vector_t{0};
#else
// this has normal performance but it's hacky
src_tmp_buf.template AsType<src_vector_t>()(Number<0>{}) =
is_src_valid
? *reinterpret_cast<const src_vector_t*>(&(reinterpret_cast<const SrcData*>(
src_buf.p_scalar_)[src_data_coord.GetOffset()]))
: src_vector_t{0};
#endif
// copy data from src_tmp_buf to dst_tmp_buf (data cast data from SrcData to DstData) // copy data from src_tmp_buf to dst_tmp_buf (data cast data from SrcData to DstData)
auto dst_tmp_buf = make_static_buffer<DstData>(Number<SrcScalarPerVector>{}); auto dst_tmp_buf = make_static_buffer<DstData>(Number<SrcScalarPerVector>{});
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment