Commit 28252273 authored by letaoqin's avatar letaoqin
Browse files

first version right

parent 580d93dc
...@@ -323,7 +323,7 @@ struct FusedMoeGemmPipeline_General ...@@ -323,7 +323,7 @@ struct FusedMoeGemmPipeline_General
Policy::template MakeGlobalTileDistribution_O<Problem>()); Policy::template MakeGlobalTileDistribution_O<Problem>());
auto save_o = [&]() { auto save_o = [&]() {
//if(blockIdx.x == 0 && (blockIdx.y == 0 || blockIdx.y == 1) && blockIdx.z == 0) // if(blockIdx.x == 0 && (blockIdx.y == 0 || blockIdx.y == 1) && blockIdx.z == 0)
{ {
if(threadIdx.x < 64) if(threadIdx.x < 64)
{ {
...@@ -339,6 +339,8 @@ struct FusedMoeGemmPipeline_General ...@@ -339,6 +339,8 @@ struct FusedMoeGemmPipeline_General
}); });
}); });
update_tile(o_window_, o0); update_tile(o_window_, o0);
// restore pos
move_tile_window(o_olds_win, {-32 * (BlockShape::Repeat_K1 - 1), 0});
} }
} }
}; };
......
...@@ -24,7 +24,7 @@ struct FusedMoeGemmPipelineGeneralPolicy ...@@ -24,7 +24,7 @@ struct FusedMoeGemmPipelineGeneralPolicy
CK_TILE_HOST_DEVICE static constexpr index_t GetAsyncCopyDwords() CK_TILE_HOST_DEVICE static constexpr index_t GetAsyncCopyDwords()
{ {
// TODO: always 1 dword // TODO: always 1 dword
return 1; return 2;
} }
template <typename Problem> template <typename Problem>
...@@ -196,7 +196,7 @@ struct FusedMoeGemmPipelineGeneralPolicy ...@@ -196,7 +196,7 @@ struct FusedMoeGemmPipelineGeneralPolicy
{ {
return make_static_tile_distribution( return make_static_tile_distribution(
tile_distribution_encoding<sequence<1>, tile_distribution_encoding<sequence<1>,
tuple<sequence<1, 2, 16>, sequence<4, 8>>, tuple<sequence<1, 1, 32>, sequence<2, 16>>,
tuple<sequence<0, 1>, sequence<1, 2>>, tuple<sequence<0, 1>, sequence<1, 2>>,
tuple<sequence<0, 0>, sequence<2, 0>>, tuple<sequence<0, 0>, sequence<2, 0>>,
sequence<1, 2>, sequence<1, 2>,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment