Commit f5e61549 authored by Anthony Chang's avatar Anthony Chang
Browse files

shrink blockwise gemm v2 thread buffer size

parent c62165da
...@@ -920,13 +920,13 @@ struct BlockwiseGemmXdlops_v2 ...@@ -920,13 +920,13 @@ struct BlockwiseGemmXdlops_v2
} }
protected: protected:
// A[M0, M1, M2, KPerThread] // A[M0, M1, M2, KPack]
static constexpr auto a_thread_desc_ = static constexpr auto a_thread_desc_ =
make_naive_tensor_descriptor_packed(make_tuple(I1, I1, I1, Number<KPerThread>{})); make_naive_tensor_descriptor_packed(make_tuple(I1, I1, I1, Number<KPack>{}));
// B[N0, N1, N2, KPerThread] // B[N0, N1, N2, KPack]
static constexpr auto b_thread_desc_ = static constexpr auto b_thread_desc_ =
make_naive_tensor_descriptor_packed(make_tuple(I1, I1, I1, Number<KPerThread>{})); make_naive_tensor_descriptor_packed(make_tuple(I1, I1, I1, Number<KPack>{}));
// C[M, N, NumRegXdlops] // C[M, N, NumRegXdlops]
static constexpr auto c_thread_desc_ = make_naive_tensor_descriptor_packed( static constexpr auto c_thread_desc_ = make_naive_tensor_descriptor_packed(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment