Commit 22329520 authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

added default value support for grid_size and streamk-polic selection set to -1

parent e553bcf1
...@@ -116,27 +116,38 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -116,27 +116,38 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
}; };
auto f_get_default_stride = auto f_get_default_stride =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) { [](std::size_t row, std::size_t col, ck::index_t stride, auto layout) {
if(stride == -1) if(stride == -1)
{ {
// give a chance if stride is zero, return a default packed stride // give a chance if stride is -1, return a default packed stride
if constexpr(std::is_same_v<decltype(layout), ck::tensor_layout::gemm::RowMajor>) if constexpr(std::is_same_v<decltype(layout), ck::tensor_layout::gemm::RowMajor>)
{ {
return col; return static_cast<std::size_t>(col);
} }
else else
{ {
return row; return static_cast<std::size_t>(row);
} }
} }
else else
return stride; return static_cast<std::size_t>(stride);
}; };
auto f_get_default_streamk_policy = [](ck::index_t streamk_sel) {
if(streamk_sel == -1)
{
return static_cast<std::size_t>(4);
}
else
return static_cast<std::size_t>(streamk_sel);
};
StrideA = f_get_default_stride(M, K, StrideA, ALayout{}); StrideA = f_get_default_stride(M, K, StrideA, ALayout{});
StrideB = f_get_default_stride(K, N, StrideB, BLayout{}); StrideB = f_get_default_stride(K, N, StrideB, BLayout{});
StrideC = f_get_default_stride(M, N, StrideC, CLayout{}); StrideC = f_get_default_stride(M, N, StrideC, CLayout{});
Streamk_sel = f_get_default_streamk_policy(Streamk_sel);
Tensor<ADataType> a_m_k(f_host_tensor_descriptor(M, K, StrideA, ALayout{})); Tensor<ADataType> a_m_k(f_host_tensor_descriptor(M, K, StrideA, ALayout{}));
Tensor<BDataType> b_k_n(f_host_tensor_descriptor(K, N, StrideB, BLayout{})); Tensor<BDataType> b_k_n(f_host_tensor_descriptor(K, N, StrideB, BLayout{}));
......
...@@ -1517,30 +1517,6 @@ struct BlockToCTileMap_GemmStreamK_v2 ...@@ -1517,30 +1517,6 @@ struct BlockToCTileMap_GemmStreamK_v2
equiv_tiles_big = MDiv(upper_big / k_iters_per_tile.get()); equiv_tiles_big = MDiv(upper_big / k_iters_per_tile.get());
equiv_tiles_little = MDiv(upper_little / k_iters_per_tile.get()); equiv_tiles_little = MDiv(upper_little / k_iters_per_tile.get());
} }
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
printf("streamk_sel=%0d,grid_size=%0d, num_tiles:%d, dp_tiles:%d, sk_tiles:%u, "
"sk_num_blocks:%d,dp_num_blocks:%d,sk_num_big_blocks:%d, "
"sk_total_iters:%d, dp_start_block_idx:%d, "
"k_iters_per_tile:%d, k_iters_per_big_block:%d, reduction_start_block_idx:%u, "
" workspace(acc float):%u\n",
streamk_sel,
grid_size,
num_tiles,
dp_tiles,
get_sk_tiles(),
sk_num_blocks,
dp_num_blocks,
sk_num_big_blocks,
sk_total_iters,
dp_start_block_idx,
k_iters_per_tile.get(),
k_iters_per_big_block,
reduction_start_block_idx,
get_workspace_size(sizeof(float)));
}
} }
__host__ __device__ static constexpr index_t CalculateGridSize(index_t M, index_t N) __host__ __device__ static constexpr index_t CalculateGridSize(index_t M, index_t N)
......
...@@ -1315,7 +1315,7 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3 ...@@ -1315,7 +1315,7 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
num_k_block_main_loop = __builtin_amdgcn_readfirstlane( num_k_block_main_loop = __builtin_amdgcn_readfirstlane(
(a_grid_desc_ak0_m_ak1.GetLength(I0) * a_grid_desc_ak0_m_ak1.GetLength(I2)) / (a_grid_desc_ak0_m_ak1.GetLength(I0) * a_grid_desc_ak0_m_ak1.GetLength(I2)) /
KPerBlock); :AK0*KPadded/KPerBlock KPerBlock);
blockwise_gemm_pipeline.template Run<HasMainKBlockLoop, TailNum>( blockwise_gemm_pipeline.template Run<HasMainKBlockLoop, TailNum>(
a_grid_desc_ak0_m_ak1, a_grid_desc_ak0_m_ak1,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment