"example/vscode:/vscode.git/clone" did not exist on "7f216620896909e254284e418d08f4d20f938a01"
Commit e5068768 authored by ThomasNing's avatar ThomasNing
Browse files

Fix the perf issue with the better perf than compute pipeline

parent bbea596d
...@@ -29,8 +29,8 @@ float gemm_calc(const gemm_basic_args& args, const ck_tile::stream_config& s) ...@@ -29,8 +29,8 @@ float gemm_calc(const gemm_basic_args& args, const ck_tile::stream_config& s)
constexpr int kBlockPerCu = 1; constexpr int kBlockPerCu = 1;
// This part comes from the Codegen // This part comes from the Codegen
constexpr ck_tile::index_t M_Tile = 128; constexpr ck_tile::index_t M_Tile = 256;
constexpr ck_tile::index_t N_Tile = 128; constexpr ck_tile::index_t N_Tile = 256;
constexpr ck_tile::index_t K_Tile = 32; constexpr ck_tile::index_t K_Tile = 32;
constexpr ck_tile::index_t M_Warp = 2; constexpr ck_tile::index_t M_Warp = 2;
...@@ -69,7 +69,7 @@ float gemm_calc(const gemm_basic_args& args, const ck_tile::stream_config& s) ...@@ -69,7 +69,7 @@ float gemm_calc(const gemm_basic_args& args, const ck_tile::stream_config& s)
ck_tile::Default2DEpilogueProblem<AccDataType, CDataType, kPadM, kPadN>>>; ck_tile::Default2DEpilogueProblem<AccDataType, CDataType, kPadM, kPadN>>>;
using CodegenGemmTraits = using CodegenGemmTraits =
ck_tile::TileGemmTraits<kPadM, kPadN, kPadK, ALayout, BLayout, CLayout, true, 3>; ck_tile::TileGemmTraits<kPadM, kPadN, kPadK, ALayout, BLayout, CLayout, true, 2>;
using CodegenPipelineProblem = ck_tile:: using CodegenPipelineProblem = ck_tile::
GemmPipelineProblem<ADataType, BDataType, AccDataType, CodegenGemmShape, CodegenGemmTraits>; GemmPipelineProblem<ADataType, BDataType, AccDataType, CodegenGemmShape, CodegenGemmTraits>;
using CodegenGemmPipeline = using CodegenGemmPipeline =
......
...@@ -149,8 +149,8 @@ struct GemmPipelineAGmemBGmemCRegV1 ...@@ -149,8 +149,8 @@ struct GemmPipelineAGmemBGmemCRegV1
const BDramBlockWindowTmp& b_dram_block_window_tmp, const BDramBlockWindowTmp& b_dram_block_window_tmp,
const BElementFunction& b_element_func, const BElementFunction& b_element_func,
index_t num_loop, index_t num_loop,
void* p_smem_0, void* __restrict__ p_smem_0,
void* p_smem_1) void* __restrict__ p_smem_1)
{ {
static_assert( static_assert(
std::is_same_v<ADataType, remove_cvref_t<typename ADramBlockWindowTmp::DataType>> && std::is_same_v<ADataType, remove_cvref_t<typename ADramBlockWindowTmp::DataType>> &&
...@@ -363,8 +363,8 @@ struct GemmPipelineAGmemBGmemCRegV1 ...@@ -363,8 +363,8 @@ struct GemmPipelineAGmemBGmemCRegV1
CK_TILE_DEVICE static auto run(const ADramBlockWindowTmp& a_dram_block_window_tmp, CK_TILE_DEVICE static auto run(const ADramBlockWindowTmp& a_dram_block_window_tmp,
const BDramBlockWindowTmp& b_dram_block_window_tmp, const BDramBlockWindowTmp& b_dram_block_window_tmp,
index_t num_loop, index_t num_loop,
void* p_smem_0, void* __restrict__ p_smem_0,
void* p_smem_1) void* __restrict__ p_smem_1)
{ {
return run( return run(
a_dram_block_window_tmp, a_dram_block_window_tmp,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment