Commit fb04c9be authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed copy

parent e871c55b
...@@ -351,6 +351,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v2 ...@@ -351,6 +351,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
// output: register to global memory // output: register to global memory
{ {
constexpr auto c_k_n_ho_wo_global_tensor_iterator_hacks = CGlobalIteratorHacks{};
static_assert(CThreadTransferDstScalarPerVector == 16 && KPerBlock == 16, ""); static_assert(CThreadTransferDstScalarPerVector == 16 && KPerBlock == 16, "");
const index_t k_block_data_on_global_vec = const index_t k_block_data_on_global_vec =
...@@ -372,10 +373,6 @@ struct GridwiseDynamicGemm_km_kn_mn_v2 ...@@ -372,10 +373,6 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
vector_type<int8_t, vec_len> d_vec; vector_type<int8_t, vec_len> d_vec;
// FloatC d_vec[c_k_n_ho_wo_thread_desc_vec.GetElementSpaceSize()];
constexpr auto c_k_n_ho_wo_global_tensor_iterator_hacks = CGlobalIteratorHacks{};
static_for<0, KPerThreadVec, 1>{}([&](auto k_i) { static_for<0, KPerThreadVec, 1>{}([&](auto k_i) {
static_for<0, HoPerThread, 1>{}([&](auto h_i) { static_for<0, HoPerThread, 1>{}([&](auto h_i) {
static_for<0, WoPerThread, 1>{}([&](auto w_i) { static_for<0, WoPerThread, 1>{}([&](auto w_i) {
...@@ -385,10 +382,6 @@ struct GridwiseDynamicGemm_km_kn_mn_v2 ...@@ -385,10 +382,6 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
FloatC>()[Number<c_k_n_ho_wo_thread_desc_vec.CalculateOffset( FloatC>()[Number<c_k_n_ho_wo_thread_desc_vec.CalculateOffset(
make_tuple(k_i, 0, h_i, w_i))>{}]; make_tuple(k_i, 0, h_i, w_i))>{}];
// t.template AsType<FloatC>()(Number<0>{}) =
// d_vec[Number<c_k_n_ho_wo_thread_desc_vec.CalculateOffset(
// make_tuple(k_i, 0, h_i, w_i))>{}];
static_for<0, CThreadTransferDstScalarPerVector, 1>{}([&](auto i) { static_for<0, CThreadTransferDstScalarPerVector, 1>{}([&](auto i) {
t.template AsType<int8_t>()(i) = t.template AsType<int8_t>()(i) =
p_c_thread[c_k_n_ho_wo_thread_desc_vec.CalculateOffset(make_tuple( p_c_thread[c_k_n_ho_wo_thread_desc_vec.CalculateOffset(make_tuple(
...@@ -398,15 +391,11 @@ struct GridwiseDynamicGemm_km_kn_mn_v2 ...@@ -398,15 +391,11 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
d_vec.template AsType<FloatC>()( d_vec.template AsType<FloatC>()(
Number<c_k_n_ho_wo_thread_desc_vec.CalculateOffset(make_tuple( Number<c_k_n_ho_wo_thread_desc_vec.CalculateOffset(make_tuple(
k_i, 0, h_i, w_i))>{}) = t.template AsType<FloatC>()[Number<0>{}]; k_i, 0, h_i, w_i))>{}) = t.template AsType<FloatC>()[Number<0>{}];
// d_vec[Number<c_k_n_ho_wo_thread_desc_vec.CalculateOffset(make_tuple(
// k_i, 0, h_i, w_i))>{}] = t.template AsType<FloatC>()[Number<0>{}];
}); });
}); });
}); });
ThreadwiseDynamicTensorSliceTransfer_v1r3< ThreadwiseDynamicTensorSliceTransfer_v1r3<
// FloatC,
decltype(d_vec), decltype(d_vec),
FloatC, FloatC,
decltype(c_k_n_ho_wo_thread_desc_vec), decltype(c_k_n_ho_wo_thread_desc_vec),
......
...@@ -927,7 +927,8 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2 ...@@ -927,7 +927,8 @@ struct ThreadwiseDynamicTensorSliceTransfer_v2
dst_desc.CalculateOffset(to_multi_index(dst_slice_origin_idx) + src_data_idx + dst_desc.CalculateOffset(to_multi_index(dst_slice_origin_idx) + src_data_idx +
i * src_scalar_step_in_vector); i * src_scalar_step_in_vector);
p_dst.template AsType<SrcData>()(i) = src_vector.template AsType<SrcData>()[i]; p_dst.template AsType<SrcData>()(Number<dst_offset>{}) =
src_vector.template AsType<SrcData>()[i];
}); });
constexpr auto move_on_dim = [&]() constexpr constexpr auto move_on_dim = [&]() constexpr
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment