Unverified Commit caad9f1e authored by Nikhil Gupta's avatar Nikhil Gupta Committed by GitHub
Browse files

[Fix] [CPU Backend] : Prepack weights for w8a8 oneDNN matmul (#33901)


Signed-off-by: default avatarnikhil-arm <nikhil.gupta2@arm.com>
parent 1d5922fa
...@@ -237,12 +237,20 @@ W8A8MatMulPrimitiveHandler::W8A8MatMulPrimitiveHandler(const Args& args) ...@@ -237,12 +237,20 @@ W8A8MatMulPrimitiveHandler::W8A8MatMulPrimitiveHandler(const Args& args)
}; };
dnnl::memory::desc original_b_md({b_k_size_, b_n_size_}, b_type_, dnnl::memory::desc original_b_md({b_k_size_, b_n_size_}, b_type_,
{b_k_stride_, b_n_stride_}); {b_k_stride_, b_n_stride_});
#ifdef __aarch64__
// dummy M size for prepacking weights
// Prepacking weights improves performance and avoid runtime reorders
constexpr dnnl_dim_t kProbeM = 128;
#else
constexpr dnnl_dim_t kProbeM = DNNL_RUNTIME_DIM_VAL;
#endif
prepack_weight(args.b_ptr, original_b_md, prepack_weight(args.b_ptr, original_b_md,
create_primitive_desc( create_primitive_desc(
MSizeCacheKey{.a_m_size = DNNL_RUNTIME_DIM_VAL, MSizeCacheKey{.a_m_size = kProbeM,
.use_bias = false, .use_bias = false,
.bias_type = dnnl::memory::data_type::undef}, .bias_type = dnnl::memory::data_type::undef},
true) /*first_time=*/true)
.weights_desc()); .weights_desc());
init_runtime_memory_cache(args); init_runtime_memory_cache(args);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment