"vscode:/vscode.git/clone" did not exist on "ea30986e0cc14b1872cb6a4b9192f18ad73208f7"
Commit a68b16a5 authored by Chao Liu's avatar Chao Liu
Browse files

refactor

parent 0a386c46
......@@ -646,9 +646,9 @@ int main(int argc, char* argv[])
device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw
#elif 0
device_convolution_implicit_gemm_v2_chwn_cyxk_khwn
#elif 1
device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw
#elif 0
device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw
#elif 1
device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw
#endif
(in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
......
......@@ -203,7 +203,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
make_ConstantTensorDescriptor_packed(thread_sub_tensor_lengths * repeat_lengths);
static_ford<decltype(repeat_lengths)>{}([&](auto repeat_multi_id_) {
#if 1
#if 0
constexpr auto repeat_multi_id = sequence2array(decltype(repeat_multi_id_){});
const auto src_thread_data_multi_id_begin = repeat_multi_id * data_per_cluster_per_dims;
......@@ -215,7 +215,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
const index_t clipboard_offset =
thread_tensor_desc.GetOffsetFromMultiIndex(clipboard_data_multi_id_begin);
#else
#else // HIP compiler performs better with these codes
constexpr auto repeat_multi_id = decltype(repeat_multi_id_){};
constexpr auto src_thread_data_multi_id_begin =
......@@ -256,7 +256,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
make_ConstantTensorDescriptor_packed(thread_sub_tensor_lengths * repeat_lengths);
static_ford<decltype(repeat_lengths)>{}([&](auto repeat_multi_id_) {
#if 1
#if 0
constexpr auto repeat_multi_id = sequence2array(decltype(repeat_multi_id_){});
const auto clipboard_data_multi_id_begin = repeat_multi_id * thread_sub_tensor_lengths;
......@@ -267,7 +267,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
thread_tensor_desc.GetOffsetFromMultiIndex(clipboard_data_multi_id_begin);
const index_t dst_offset = DstDesc{}.GetOffsetFromMultiIndex(dst_data_multi_id_begin);
#else
#else // HIP compiler performs better with these codes
constexpr auto repeat_multi_id = decltype(repeat_multi_id_){};
constexpr auto clipboard_data_multi_id_begin =
......@@ -338,27 +338,6 @@ struct BlockwiseGenericTensorSliceCopy_v1
src_partial_original_desc.UpdateMultiIndexGivenStepSizeOf1dIndex(
old_src_partial_original_multi_id, StepSize, direction);
#if 0
{
if(debug_flag && get_block_1d_id() == 0)
{
printf("id %5u %5u: "
"old_src_partial_original_multi_id %u %u %u, "
"new_src_partial_original_multi_id %u %u %u, "
"mThreadSrcOffset %u, mThreadDstOffset %u \n",
get_block_1d_id(),
get_thread_local_1d_id(),
old_src_partial_original_multi_id[0],
old_src_partial_original_multi_id[1],
old_src_partial_original_multi_id[2],
new_src_partial_original_multi_id[0],
new_src_partial_original_multi_id[1],
new_src_partial_original_multi_id[2]
);
}
}
#endif
// update "mThreadSrcOriginalMultiId"
static_for<0, decltype(src_partial_original_dims)::GetSize(), 1>{}([&](auto I_) {
constexpr auto I = decltype(I_){};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment