"include/vscode:/vscode.git/clone" did not exist on "c1f7d9f2a327ffaa182d4aca08c3f7f0bbb48553"
Commit 22329520 authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

added default value support for grid_size and streamk-polic selection set to -1

parent e553bcf1
......@@ -116,27 +116,38 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
};
auto f_get_default_stride =
[](std::size_t row, std::size_t col, std::size_t stride, auto layout) {
[](std::size_t row, std::size_t col, ck::index_t stride, auto layout) {
if(stride == -1)
{
// give a chance if stride is zero, return a default packed stride
// give a chance if stride is -1, return a default packed stride
if constexpr(std::is_same_v<decltype(layout), ck::tensor_layout::gemm::RowMajor>)
{
return col;
return static_cast<std::size_t>(col);
}
else
{
return row;
return static_cast<std::size_t>(row);
}
}
else
return stride;
return static_cast<std::size_t>(stride);
};
auto f_get_default_streamk_policy = [](ck::index_t streamk_sel) {
if(streamk_sel == -1)
{
return static_cast<std::size_t>(4);
}
else
return static_cast<std::size_t>(streamk_sel);
};
StrideA = f_get_default_stride(M, K, StrideA, ALayout{});
StrideB = f_get_default_stride(K, N, StrideB, BLayout{});
StrideC = f_get_default_stride(M, N, StrideC, CLayout{});
Streamk_sel = f_get_default_streamk_policy(Streamk_sel);
Tensor<ADataType> a_m_k(f_host_tensor_descriptor(M, K, StrideA, ALayout{}));
Tensor<BDataType> b_k_n(f_host_tensor_descriptor(K, N, StrideB, BLayout{}));
......
......@@ -1517,30 +1517,6 @@ struct BlockToCTileMap_GemmStreamK_v2
equiv_tiles_big = MDiv(upper_big / k_iters_per_tile.get());
equiv_tiles_little = MDiv(upper_little / k_iters_per_tile.get());
}
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
{
printf("streamk_sel=%0d,grid_size=%0d, num_tiles:%d, dp_tiles:%d, sk_tiles:%u, "
"sk_num_blocks:%d,dp_num_blocks:%d,sk_num_big_blocks:%d, "
"sk_total_iters:%d, dp_start_block_idx:%d, "
"k_iters_per_tile:%d, k_iters_per_big_block:%d, reduction_start_block_idx:%u, "
" workspace(acc float):%u\n",
streamk_sel,
grid_size,
num_tiles,
dp_tiles,
get_sk_tiles(),
sk_num_blocks,
dp_num_blocks,
sk_num_big_blocks,
sk_total_iters,
dp_start_block_idx,
k_iters_per_tile.get(),
k_iters_per_big_block,
reduction_start_block_idx,
get_workspace_size(sizeof(float)));
}
}
__host__ __device__ static constexpr index_t CalculateGridSize(index_t M, index_t N)
......
......@@ -1315,7 +1315,7 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
num_k_block_main_loop = __builtin_amdgcn_readfirstlane(
(a_grid_desc_ak0_m_ak1.GetLength(I0) * a_grid_desc_ak0_m_ak1.GetLength(I2)) /
KPerBlock); :AK0*KPadded/KPerBlock
KPerBlock);
blockwise_gemm_pipeline.template Run<HasMainKBlockLoop, TailNum>(
a_grid_desc_ak0_m_ak1,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment