Commit 631d9892 authored by Jing Zhang's avatar Jing Zhang
Browse files

clean code

parent 961556eb
......@@ -122,10 +122,7 @@ struct ThreadwiseGenericTensorSliceCopy_v5
__device__ static auto buffer_vector_load(const SrcData* p_src, const SrcCoord src_coord_begin)
{
auto src_offset = src_coord_begin.GetOffset();
auto r = GetRegBuffer<SrcData, SrcDataPerAccess>();
r.GetVector(Number<SrcDataPerAccess>{})(Number<0>{}) =
amd_buffer_load<SrcData, SrcDataPerAccess>(p_src, src_offset, true, SrcDataRange);
return r;
return amd_buffer_load<SrcData, SrcDataPerAccess>(p_src, src_offset, true, SrcDataRange);
}
template <typename DstData, index_t DstDataPerAccess>
......@@ -187,8 +184,10 @@ struct ThreadwiseGenericTensorSliceCopy_v5
// load data from src to the long-vector buffer
const auto src_coord = mSrcSliceOrigin + to_multi_index(long_vector_data_begin_id);
auto src_buff = buffer_vector_load<SrcDataPerRead, SrcDesc::GetElementSpace()>(
p_src, src_coord);
auto src_buff = GetRegBuffer<SrcData, SrcDataPerRead>();
src_buff.GetVector(Number<SrcDataPerRead>{})(Number<0>{}) =
buffer_vector_load<SrcDataPerRead, SrcDesc::GetElementSpace()>(p_src,
src_coord);
static_for<0, SrcDataPerRead, 1>{}([&](auto i) {
constexpr auto vector_id = long_vector_data_begin_id.Modify(
......
......@@ -115,7 +115,7 @@ void gridwise_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw(
// B matrix Copy
constexpr index_t GemmBBlockCopyClusterLengths_GemmK = 4;
constexpr index_t GemmBBlockCopyClusterLengths_GemmN = 4;
constexpr index_t GemmBBlockCopyClusterLengths_GemmKPack = 4;
constexpr index_t GemmBBlockCopyClusterLengths_GemmKPack = 1;
constexpr index_t GemmBBlockCopyThreadSliceLengths_GemmK =
GemmKPerBlock / GemmBBlockCopyClusterLengths_GemmK;
......@@ -141,7 +141,7 @@ void gridwise_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw(
using GemmBBlockCopyDstAccessOrder = Sequence<0, 1, 2, 3>; // [GemmG, GemmK, GemmN, GemmKPack]
constexpr index_t GemmBBlockCopySrcDataPerRead_GemmN = 4;
constexpr index_t GemmBBlockCopyDstDataPerWrite_GemmKPack = 1;
constexpr index_t GemmBBlockCopyDstDataPerWrite_GemmKPack = 4;
// gridwise GEMM
constexpr auto wkgrp_schd_order = NBlock1MBlock0;
......
......@@ -24,11 +24,11 @@ int main(int argc, char* argv[])
using namespace ck;
// 1x1, 56x56
constexpr index_t N = 4;
constexpr index_t C = 32;
constexpr index_t HI = 2;
constexpr index_t WI = 2;
constexpr index_t K = 32;
constexpr index_t N = 64;
constexpr index_t C = 128;
constexpr index_t HI = 56;
constexpr index_t WI = 56;
constexpr index_t K = 128;
constexpr index_t Y = 1;
constexpr index_t X = 1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment