"scripts/build-all-components.sh" did not exist on "fcdba4f3eedbf9a169ee8b31ea45bfe75023dd36"
Commit be1afaa2 authored by Tri Dao's avatar Tri Dao
Browse files

[Gen, FT] Use fp32 accum for FMA

parent f266fc72
......@@ -30,7 +30,7 @@
// Below are knobs to extend FP32 accumulation for higher FP16 accuracy
// Does not seem to affect the accuracy that much
// #define MMHA_USE_FP32_ACUM_FOR_FMA
#define MMHA_USE_FP32_ACUM_FOR_FMA
// Seems to slightly improve the accuracy
#define MMHA_USE_FP32_ACUM_FOR_OUT
......@@ -271,27 +271,6 @@ struct Qk_vec_acum_fp32_<bf16_8_t> {
using Type = Float8_;
};
template<>
struct Qk_vec_acum_fp32_<uint4> {
using Type = Float8_;
};
template<>
struct Qk_vec_acum_fp32_<__nv_bfloat16> {
using Type = float;
};
template<>
struct Qk_vec_acum_fp32_<__nv_bfloat162> {
using Type = float2;
};
template<>
struct Qk_vec_acum_fp32_<bf16_4_t> {
using Type = Float4_;
};
template<>
struct Qk_vec_acum_fp32_<bf16_8_t> {
using Type = Float8_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
template<typename T>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment