Unverified Commit 574a5384 authored by Younes Belkada's avatar Younes Belkada Committed by GitHub
Browse files

[`FA-2`] Revert suggestion that broke FA2 fine-tuning with quantized models (#26916)

revert
parent caa0ff0b
......@@ -614,7 +614,10 @@ class FalconFlashAttention2(FalconAttention):
input_dtype = query_layer.dtype
if input_dtype == torch.float32:
# Handle the case where the model is quantized
target_dtype = getattr(self.config, "_pre_quantization_dtype", self.query_key_value.weight.dtype)
if hasattr(self.config, "_pre_quantization_dtype"):
target_dtype = self.config._pre_quantization_dtype
else:
target_dtype = self.query_key_value.weight.dtype
logger.warning_once(
f"The input hidden states seems to be silently casted in float32, this might be related to"
......
......@@ -476,7 +476,10 @@ class LlamaFlashAttention2(LlamaAttention):
input_dtype = query_states.dtype
if input_dtype == torch.float32:
# Handle the case where the model is quantized
target_dtype = getattr(self.config, "_pre_quantization_dtype", self.q_proj.weight.dtype)
if hasattr(self.config, "_pre_quantization_dtype"):
target_dtype = self.config._pre_quantization_dtype
else:
target_dtype = self.q_proj.weight.dtype
logger.warning_once(
f"The input hidden states seems to be silently casted in float32, this might be related to"
......
......@@ -409,7 +409,10 @@ class MistralFlashAttention2(MistralAttention):
input_dtype = query_states.dtype
if input_dtype == torch.float32:
# Handle the case where the model is quantized
target_dtype = getattr(self.config, "_pre_quantization_dtype", self.q_proj.weight.dtype)
if hasattr(self.config, "_pre_quantization_dtype"):
target_dtype = self.config._pre_quantization_dtype
else:
target_dtype = self.q_proj.weight.dtype
logger.warning_once(
f"The input hidden states seems to be silently casted in float32, this might be related to"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment