Commit 9728f541 authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed copy

parent 25b71afc
......@@ -482,15 +482,13 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
CThreadTransferDstScalarPerVector;
static_assert(vec_len == 256, "");
// vector_type<int8_t, vec_len> d_vec;
FloatC d_vec[d_k_n_hox2_wox2_thread_desc.GetElementSpaceSize()];
vector_type<int8_t, vec_len> d_vec;
constexpr auto c_k_n_ho_wo_global_tensor_iterator_hacks = CGlobalIteratorHacks{};
ThreadwiseDynamicTensorSliceTransfer_v2<
FloatC,
// decltype(d_vec),
FloatC,
decltype(d_vec),
decltype(d_k_n_hox2_wox2_global_desc),
decltype(d_k_n_hox2_wox2_thread_desc),
Sequence<KPerThreadAdd, 1, HoPerThreadx2, WoPerThreadx2>,
......@@ -519,12 +517,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
static_for<0, WoPerThreadx2, 1>{}([&](auto w_i) {
vector_type<int8_t, CThreadTransferDstScalarPerVector> t;
// t.template AsType<FloatC>()(Number<0>{}) = d_vec.template AsType<
// FloatC>()[Number<d_k_n_hox2_wox2_thread_desc.CalculateOffset(
// make_tuple(k_i, 0, h_i, w_i))>{}];
t.template AsType<FloatC>()(Number<0>{}) =
d_vec[Number<d_k_n_hox2_wox2_thread_desc.CalculateOffset(
t.template AsType<FloatC>()(Number<0>{}) = d_vec.template AsType<
FloatC>()[Number<d_k_n_hox2_wox2_thread_desc.CalculateOffset(
make_tuple(k_i, 0, h_i, w_i))>{}];
static_for<0, CThreadTransferDstScalarPerVector, 1>{}([&](auto i) {
......@@ -536,19 +530,15 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
w_i / 2))];
});
// d_vec.template AsType<FloatC>()(
// Number<d_k_n_hox2_wox2_thread_desc.CalculateOffset(make_tuple(
// k_i, 0, h_i, w_i))>{}) = t.template AsType<FloatC>()[Number<0>{}];
d_vec[Number<d_k_n_hox2_wox2_thread_desc.CalculateOffset(make_tuple(
k_i, 0, h_i, w_i))>{}] = t.template AsType<FloatC>()[Number<0>{}];
d_vec.template AsType<FloatC>()(
Number<d_k_n_hox2_wox2_thread_desc.CalculateOffset(make_tuple(
k_i, 0, h_i, w_i))>{}) = t.template AsType<FloatC>()[Number<0>{}];
});
});
});
ThreadwiseDynamicTensorSliceTransfer_v1r3<
// decltype(d_vec),
FloatC,
decltype(d_vec),
FloatC,
decltype(d_k_n_hox2_wox2_thread_desc),
decltype(d_k_n_hox2_wox2_global_desc),
......
......@@ -377,7 +377,8 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1r3
src_desc.CalculateOffset(to_multi_index(src_slice_origin_idx) + dst_data_idx +
i * dst_scalar_step_in_vector);
dst_vector.template AsType<DstData>()(i) = p_src.template AsType<DstData>()[i];
dst_vector.template AsType<DstData>()(i) =
p_src.template AsType<DstData>()[Number<src_offset>{}];
});
const bool is_dst_valid = coordinate_has_valid_offset_assuming_visible_index_is_valid(
......@@ -926,7 +927,8 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2
dst_desc.CalculateOffset(to_multi_index(dst_slice_origin_idx) + src_data_idx +
i * src_scalar_step_in_vector);
p_dst.template AsType<SrcData>()(i) = src_vector.template AsType<SrcData>()[i];
p_dst.template AsType<SrcData>()(Number<dst_offset>{}) =
src_vector.template AsType<SrcData>()[i];
});
constexpr auto move_on_dim = [&]() constexpr
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment