Commit ff4b1b1d authored by Jing Zhang's avatar Jing Zhang
Browse files

restore

parent 73a665f0
...@@ -174,7 +174,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3 ...@@ -174,7 +174,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
const index_t wo_thread_data_on_global = const index_t wo_thread_data_on_global =
wo_block_data_on_global + wo_thread_id * WoPerThread; wo_block_data_on_global + wo_thread_id * WoPerThread;
#if 0 #if 1
// A matrix blockwise copy // A matrix blockwise copy
auto a_blockwise_copy = auto a_blockwise_copy =
BlockwiseDynamicTensorSliceTransfer_v4<BlockSize, BlockwiseDynamicTensorSliceTransfer_v4<BlockSize,
...@@ -354,7 +354,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3 ...@@ -354,7 +354,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
} }
#endif #endif
#if 0 #if 1
// output: register to global memory // output: register to global memory
{ {
constexpr auto HoPerThreadx2 = HoPerThread * 2; constexpr auto HoPerThreadx2 = HoPerThread * 2;
...@@ -418,10 +418,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v3 ...@@ -418,10 +418,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
c_k_n_ho_wo_global_tensor_iterator_hacks); c_k_n_ho_wo_global_tensor_iterator_hacks);
static_for<0, vector_len, 1>{}([&](auto i) { static_for<0, vector_len, 1>{}([&](auto i) {
// d_vec.Scalars()(i) += d_vec.Scalars()(i) +=
// p_c_thread[c_k_n_ho_wo_thread_desc.CalculateOffset( p_c_thread[c_k_n_ho_wo_thread_desc.CalculateOffset(
// make_tuple(k_i * vector_len + i, 0, h_i / 2, w_i / 2))]; make_tuple(k_i * vector_len + i, 0, h_i / 2, w_i / 2))];
d_vec.Vector() += 1;
}); });
ThreadwiseDynamicTensorSliceTransfer_v1r3< ThreadwiseDynamicTensorSliceTransfer_v1r3<
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment