Commit c0264b8f authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed

parent 510d6464
...@@ -234,7 +234,6 @@ struct GridwiseGemmMultipleD_xdl_splitk_cshuffle ...@@ -234,7 +234,6 @@ struct GridwiseGemmMultipleD_xdl_splitk_cshuffle
make_tuple(Sequence<0, 2>{}, Sequence<1>{})); make_tuple(Sequence<0, 2>{}, Sequence<1>{}));
} }
// prefer this to be called on host
__host__ __device__ static auto CalculateMPadded(index_t M) __host__ __device__ static auto CalculateMPadded(index_t M)
{ {
return math::integer_least_multiple(M, MPerBlock); return math::integer_least_multiple(M, MPerBlock);
...@@ -748,8 +747,8 @@ struct GridwiseGemmMultipleD_xdl_splitk_cshuffle ...@@ -748,8 +747,8 @@ struct GridwiseGemmMultipleD_xdl_splitk_cshuffle
GridwiseGemmPipeline_Selector<PipelineVer, NumGemmKPrefetchStage, LoopSched>(); GridwiseGemmPipeline_Selector<PipelineVer, NumGemmKPrefetchStage, LoopSched>();
const index_t num_k_block_main_loop = const index_t num_k_block_main_loop =
__builtin_amdgcn_readfirstlane((a_grid_desc_kbatch_ak0_m_ak1.GetLength(I0) * __builtin_amdgcn_readfirstlane((a_grid_desc_kbatch_ak0_m_ak1.GetLength(I1) *
a_grid_desc_kbatch_ak0_m_ak1.GetLength(I2)) / a_grid_desc_kbatch_ak0_m_ak1.GetLength(I3)) /
KPerBlock); KPerBlock);
gridwise_gemm_pipeline.template Run<HasMainKBlockLoop>(a_grid_desc_kbatch_ak0_m_ak1, gridwise_gemm_pipeline.template Run<HasMainKBlockLoop>(a_grid_desc_kbatch_ak0_m_ak1,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment