"include/vscode:/vscode.git/clone" did not exist on "48ae40517936a0488bc70e2ee83e8a470d8073f9"
Commit d846292c authored by letaoqin's avatar letaoqin
Browse files

rewite save o code

parent 15e76415
...@@ -306,7 +306,6 @@ struct FusedMoeGemmPipeline_General ...@@ -306,7 +306,6 @@ struct FusedMoeGemmPipeline_General
make_tuple(number<32>{}, number<32>{}), make_tuple(number<32>{}, number<32>{}),
{0, 0}, {0, 0},
Policy::template MakeGlobalTileDistribution_O<Problem>()); Policy::template MakeGlobalTileDistribution_O<Problem>());
ignore = o_alds_win;
auto save_o = [&]() { auto save_o = [&]() {
if(blockIdx.x == 0 && (blockIdx.y == 0 || blockIdx.y == 1) && blockIdx.z == 0) if(blockIdx.x == 0 && (blockIdx.y == 0 || blockIdx.y == 1) && blockIdx.z == 0)
...@@ -314,17 +313,16 @@ struct FusedMoeGemmPipeline_General ...@@ -314,17 +313,16 @@ struct FusedMoeGemmPipeline_General
if(threadIdx.x < 64) if(threadIdx.x < 64)
{ {
auto o0 = load_tile(o_olds_win); auto o0 = load_tile(o_olds_win);
for(int step = 1; step < 4; step++) constexpr index_t thread_buffer_size = decltype(o0)::get_thread_buffer_size();
{ static_for<1, BlockShape::Repeat_K1, 1>{}([&](auto) {
move_tile_window(o_olds_win, {32, 0}); move_tile_window(o_olds_win, {32, 0});
auto o1 = load_tile(o_olds_win); auto o1 = load_tile(o_olds_win);
for(int i = 0; i < 16; i++) static_for<0, thread_buffer_size, 1>{}([&](auto i) {
{
o0.get_thread_buffer()(i) = type_convert<ODataType>( o0.get_thread_buffer()(i) = type_convert<ODataType>(
type_convert<float>(o0.get_thread_buffer()[i]) + type_convert<float>(o0.get_thread_buffer()[i]) +
type_convert<float>(o1.get_thread_buffer()[i])); type_convert<float>(o1.get_thread_buffer()[i]));
} });
} });
update_tile(o_window_, o0); update_tile(o_window_, o0);
} }
} }
......
...@@ -216,7 +216,8 @@ struct FusedMoeGemmPipelineGeneralPolicy ...@@ -216,7 +216,8 @@ struct FusedMoeGemmPipelineGeneralPolicy
typename S_::WarpTile_0>>; typename S_::WarpTile_0>>;
constexpr auto warp_gemm = GetWarpGemm0<Problem>(); constexpr auto warp_gemm = GetWarpGemm0<Problem>();
using BlockGemmPolicy = BlockGemmASmemBSmemCRegV1CustomPolicy<typename Problem::ADataType, using BlockGemmPolicy =
BlockGemmASmemBSmemCRegV1CustomPolicy<typename Problem::ADataType,
// using BlockGemmPolicy = // using BlockGemmPolicy =
// BlockGemmASmemBRegCRegV1CustomPolicy<typename // BlockGemmASmemBRegCRegV1CustomPolicy<typename
// Problem::ADataType, // Problem::ADataType,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment