[Gen, FT] Use fp32 accum for FMA

be1afaa2 · Tri Dao · f266fc72 · be1afaa2
Commit be1afaa2 authored Jan 03, 2023 by Tri Dao
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 22 deletions

csrc/ft_attention/decoder_masked_multihead_attention_template.hpp ...attention/decoder_masked_multihead_attention_template.hpp +1 -22

No files found.
--- a/csrc/ft_attention/decoder_masked_multihead_attention_template.hpp
+++ b/csrc/ft_attention/decoder_masked_multihead_attention_template.hpp
@@ -30,7 +30,7 @@
 // Below are knobs to extend FP32 accumulation for higher FP16 accuracy
 // Does not seem to affect the accuracy that much
-// #define MMHA_USE_FP32_ACUM_FOR_FMA
+#define MMHA_USE_FP32_ACUM_FOR_FMA
 // Seems to slightly improve the accuracy
 #define MMHA_USE_FP32_ACUM_FOR_OUT
@@ -271,27 +271,6 @@ struct Qk_vec_acum_fp32_<bf16_8_t> {
    using Type = Float8_;
 };
-template<>
-struct Qk_vec_acum_fp32_<uint4> {
-    using Type = Float8_;
-};
-template<>
-struct Qk_vec_acum_fp32_<__nv_bfloat16> {
-    using Type = float;
-};
-template<>
-struct Qk_vec_acum_fp32_<__nv_bfloat162> {
-    using Type = float2;
-};
-template<>
-struct Qk_vec_acum_fp32_<bf16_4_t> {
-    using Type = Float4_;
-};
-template<>
-struct Qk_vec_acum_fp32_<bf16_8_t> {
-    using Type = Float8_;
-};
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 template<typename T>