Commit 4d8bbeae authored by Jianfeng yan's avatar Jianfeng yan
Browse files

clang-format

parent b0ebfb40
...@@ -563,28 +563,29 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3 ...@@ -563,28 +563,29 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3
n_thread_data_on_grid_to_n0_n1_n2_adaptor.CalculateBottomIndex( n_thread_data_on_grid_to_n0_n1_n2_adaptor.CalculateBottomIndex(
make_multi_index(n_thread_data_on_grid)); make_multi_index(n_thread_data_on_grid));
auto c_thread_copy = ThreadwiseTensorSliceTransfer_v1r3< auto c_thread_copy =
FloatAcc, ThreadwiseTensorSliceTransfer_v1r3<FloatAcc,
FloatC, FloatC,
decltype(c_thread_desc_m0_n0_m1_n1_m2_m3_m4_n2), decltype(c_thread_desc_m0_n0_m1_n1_m2_m3_m4_n2),
decltype(c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2), decltype(c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2),
CElementwiseOperation, CElementwiseOperation,
Sequence<M0, N0, I1, I1, M2, I1, M4, I1>, Sequence<M0, N0, I1, I1, M2, I1, M4, I1>,
CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstAccessOrder,
CThreadTransferSrcDstVectorDim, CThreadTransferSrcDstVectorDim,
CThreadTransferDstScalarPerVector, CThreadTransferDstScalarPerVector,
CGlobalMemoryDataOperation, CGlobalMemoryDataOperation,
1, 1,
true>{c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2, true>{
make_multi_index(m_thread_data_on_grid_idx[I0], c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2,
n_thread_data_on_grid_idx[I0], make_multi_index(m_thread_data_on_grid_idx[I0],
m_thread_data_on_grid_idx[I1], n_thread_data_on_grid_idx[I0],
n_thread_data_on_grid_idx[I1], m_thread_data_on_grid_idx[I1],
m_thread_data_on_grid_idx[I2], n_thread_data_on_grid_idx[I1],
m_thread_data_on_grid_idx[I3], m_thread_data_on_grid_idx[I2],
m_thread_data_on_grid_idx[I4], m_thread_data_on_grid_idx[I3],
n_thread_data_on_grid_idx[I2]), m_thread_data_on_grid_idx[I4],
c_element_op}; n_thread_data_on_grid_idx[I2]),
c_element_op};
c_thread_copy.Run(c_thread_desc_m0_n0_m1_n1_m2_m3_m4_n2, c_thread_copy.Run(c_thread_desc_m0_n0_m1_n1_m2_m3_m4_n2,
make_tuple(I0, I0, I0, I0, I0, I0, I0, I0), make_tuple(I0, I0, I0, I0, I0, I0, I0, I0),
......
...@@ -248,9 +248,7 @@ struct ThreadwiseTensorSliceTransfer_v2 ...@@ -248,9 +248,7 @@ struct ThreadwiseTensorSliceTransfer_v2
src_coord_ = make_tensor_coordinate(src_desc, src_slice_origin_idx); src_coord_ = make_tensor_coordinate(src_desc, src_slice_origin_idx);
} }
template <typename SrcBuffer, template <typename SrcBuffer, typename DstBuffer, typename DstSliceOriginIdx>
typename DstBuffer,
typename DstSliceOriginIdx>
__device__ void Run(const SrcDesc& src_desc, __device__ void Run(const SrcDesc& src_desc,
const SrcBuffer& src_buf, const SrcBuffer& src_buf,
const DstDesc&, const DstDesc&,
...@@ -315,7 +313,6 @@ struct ThreadwiseTensorSliceTransfer_v2 ...@@ -315,7 +313,6 @@ struct ThreadwiseTensorSliceTransfer_v2
move_tensor_coordinate( move_tensor_coordinate(
src_desc, src_coord_, make_tensor_coordinate_step(dst_desc, forward_step)); src_desc, src_coord_, make_tensor_coordinate_step(dst_desc, forward_step));
} }
}); });
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment