Commit 1ebc21d4 authored by wangshaojie6's avatar wangshaojie6
Browse files

fix n2 compute error

parent 336a7065
...@@ -790,7 +790,7 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle ...@@ -790,7 +790,7 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
const index_t nstartxdl = nstart + n0_i * NPerRepeat; const index_t nstartxdl = nstart + n0_i * NPerRepeat;
const index_t acc_idx_n0 = acc_idx_m0 + n0_i * n2 * n4; const index_t acc_idx_n0 = acc_idx_m0 + n0_i * n2 * n4;
static_for<0, n2, 1>{}([&](auto n2_i) { static_for<0, n2, 1>{}([&](auto n2_i) {
const index_t nstartgroup = nstartxdl + thread_n_cluster_id * n4 + n2_i * n3 * n4; const index_t nstartgroup = nstartxdl + thread_n_cluster_id * n4 + n2_i * (warpSize / MPerXdl) * n4;
const index_t acc_idx_n2 = acc_idx_n0 + n2_i * n4; const index_t acc_idx_n2 = acc_idx_n0 + n2_i * n4;
static_for<0, n4, 1>{}([&](auto n4_i) { static_for<0, n4, 1>{}([&](auto n4_i) {
const index_t n_global = nstartgroup + n4_i; const index_t n_global = nstartgroup + n4_i;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment