"vscode:/vscode.git/clone" did not exist on "2c9b7c533575ea887fcf0a8faecdee05a10ae4b6"
Commit d846292c authored by letaoqin's avatar letaoqin
Browse files

rewite save o code

parent 15e76415
......@@ -306,7 +306,6 @@ struct FusedMoeGemmPipeline_General
make_tuple(number<32>{}, number<32>{}),
{0, 0},
Policy::template MakeGlobalTileDistribution_O<Problem>());
ignore = o_alds_win;
auto save_o = [&]() {
if(blockIdx.x == 0 && (blockIdx.y == 0 || blockIdx.y == 1) && blockIdx.z == 0)
......@@ -314,17 +313,16 @@ struct FusedMoeGemmPipeline_General
if(threadIdx.x < 64)
{
auto o0 = load_tile(o_olds_win);
for(int step = 1; step < 4; step++)
{
constexpr index_t thread_buffer_size = decltype(o0)::get_thread_buffer_size();
static_for<1, BlockShape::Repeat_K1, 1>{}([&](auto) {
move_tile_window(o_olds_win, {32, 0});
auto o1 = load_tile(o_olds_win);
for(int i = 0; i < 16; i++)
{
static_for<0, thread_buffer_size, 1>{}([&](auto i) {
o0.get_thread_buffer()(i) = type_convert<ODataType>(
type_convert<float>(o0.get_thread_buffer()[i]) +
type_convert<float>(o1.get_thread_buffer()[i]));
}
}
});
});
update_tile(o_window_, o0);
}
}
......
......@@ -216,7 +216,8 @@ struct FusedMoeGemmPipelineGeneralPolicy
typename S_::WarpTile_0>>;
constexpr auto warp_gemm = GetWarpGemm0<Problem>();
using BlockGemmPolicy = BlockGemmASmemBSmemCRegV1CustomPolicy<typename Problem::ADataType,
using BlockGemmPolicy =
BlockGemmASmemBSmemCRegV1CustomPolicy<typename Problem::ADataType,
// using BlockGemmPolicy =
// BlockGemmASmemBRegCRegV1CustomPolicy<typename
// Problem::ADataType,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment