fix n2 compute error

1ebc21d4 · wangshaojie6 · 336a7065 · 1ebc21d4
Commit 1ebc21d4 authored Sep 14, 2022 by wangshaojie6
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp ...id/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp +1 -1

No files found.
--- a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp
@@ -790,7 +790,7 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
                    const index_t nstartxdl = nstart + n0_i * NPerRepeat;
                    const index_t acc_idx_n0 = acc_idx_m0 + n0_i * n2 * n4;
                    static_for<0, n2, 1>{}([&](auto n2_i) {
-                        const index_t nstartgroup = nstartxdl + thread_n_cluster_id * n4 + n2_i * n3 * n4;
+                        const index_t nstartgroup = nstartxdl + thread_n_cluster_id * n4 + n2_i * (warpSize / MPerXdl) * n4;
                        const index_t acc_idx_n2 = acc_idx_n0 + n2_i * n4;
                        static_for<0, n4, 1>{}([&](auto n4_i) {
                            const index_t n_global = nstartgroup + n4_i;