Unverified Commit c1f8d53c authored by feli's avatar feli Committed by GitHub
Browse files

[Ck_tile] hot fix, fix rpcf param setting err (#1657)


Co-authored-by: default avatardummycoderfe <noplydummmycoder@163.com>
parent efd92615
...@@ -121,7 +121,7 @@ struct Layernorm2dFwdPipelineOnePass ...@@ -121,7 +121,7 @@ struct Layernorm2dFwdPipelineOnePass
auto [mean, var] = block_welford(acc, cur_count, max_count); auto [mean, var] = block_welford(acc, cur_count, max_count);
block_welford_sync(mean, var, cur_count); block_welford_sync(mean, var, cur_count);
block_welford_cross_warp_sync(mean, var, cur_count, smem); block_welford_cross_warp_sync(mean, var, cur_count, smem);
block_tile_welford_post_scale_var(var, cur_count); block_tile_welford_post_scale_var(var, cur_count, constant<kFastFDiv>{});
// compute inv-std // compute inv-std
auto inv_std = tile_elementwise_in( auto inv_std = tile_elementwise_in(
......
...@@ -35,6 +35,7 @@ struct Layernorm2dFwdPipelineTwoPass ...@@ -35,6 +35,7 @@ struct Layernorm2dFwdPipelineTwoPass
static constexpr bool kNeedCrossWarpSync = Problem::kNeedCrossWarpSync; static constexpr bool kNeedCrossWarpSync = Problem::kNeedCrossWarpSync;
static constexpr bool kPadM = false; // TODO - BlockLayernorm2dFwdProblem::kPadM static constexpr bool kPadM = false; // TODO - BlockLayernorm2dFwdProblem::kPadM
static constexpr bool kPadN = Problem::Traits::kPadN; static constexpr bool kPadN = Problem::Traits::kPadN;
static constexpr bool kFastFDiv = Problem::Traits::kFastFDiv;
static constexpr auto kFusedAdd = Problem::Traits::kFusedAdd; static constexpr auto kFusedAdd = Problem::Traits::kFusedAdd;
static constexpr auto kFusedQuant = Problem::Traits::kFusedQuant; static constexpr auto kFusedQuant = Problem::Traits::kFusedQuant;
...@@ -137,15 +138,22 @@ struct Layernorm2dFwdPipelineTwoPass ...@@ -137,15 +138,22 @@ struct Layernorm2dFwdPipelineTwoPass
block_welford_sync(mean, var, cur_count); block_welford_sync(mean, var, cur_count);
block_welford_cross_warp_sync(mean, var, cur_count, smem); block_welford_cross_warp_sync(mean, var, cur_count, smem);
block_tile_welford_post_scale_var(var, cur_count); block_tile_welford_post_scale_var(var, cur_count, constant<kFastFDiv>{});
// compute inv-std // compute inv-std
auto inv_std = tile_elementwise_in( auto inv_std = tile_elementwise_in(
[&](const auto& v_) { [&](const auto& v_) {
return type_convert<ComputeDataType>(1.0f) / (sqrt(v_ + epsilon)); if(kFastFDiv && std::is_same_v<ComputeDataType, float>)
{
return type_convert<ComputeDataType>(1.0f) *
__builtin_amdgcn_rcpf(sqrt(v_ + epsilon));
}
else
{
return type_convert<ComputeDataType>(1.0f) / sqrt(v_ + epsilon);
}
}, },
var); var);
if constexpr(kSaveMean) if constexpr(kSaveMean)
store_tile(mean_window, cast_tile<MeanDataType>(mean)); store_tile(mean_window, cast_tile<MeanDataType>(mean));
if constexpr(kSaveInvStd) if constexpr(kSaveInvStd)
......
...@@ -47,8 +47,11 @@ struct BlockWelford ...@@ -47,8 +47,11 @@ struct BlockWelford
auto x = ck_tile::type_convert<ComputeDataType>(x_tensor[in_dstr_idx]); auto x = ck_tile::type_convert<ComputeDataType>(x_tensor[in_dstr_idx]);
welford_update( welford_update(mean_tensor(out_dstr_idx),
mean_tensor(out_dstr_idx), var_tensor(out_dstr_idx), x, cur_count_); var_tensor(out_dstr_idx),
x,
cur_count_,
constant<kFastFDiv>{});
}); });
} }
}); });
...@@ -159,7 +162,8 @@ struct BlockWelfordSync ...@@ -159,7 +162,8 @@ struct BlockWelfordSync
v_local_count, v_local_count,
v_remote_mean, v_remote_mean,
v_remote_var, v_remote_var,
v_remote_count); v_remote_count,
constant<kFastFDiv>{});
}); });
} }
}); });
...@@ -307,7 +311,8 @@ struct BlockWelfordCrossWarpSync ...@@ -307,7 +311,8 @@ struct BlockWelfordCrossWarpSync
v_local_count, v_local_count,
v_remote_mean, v_remote_mean,
v_remote_var, v_remote_var,
v_remote_count); v_remote_count,
constant<kFastFDiv>{});
}); });
mean_tensor.get_thread_buffer()(i_0) = v_local_mean; mean_tensor.get_thread_buffer()(i_0) = v_local_mean;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment