Bugfix: LLaMA layer norm incorrectly changes input type and consumers lots of memory (#23535)

* Fixed bug where LLaMA layer norm would change input type. * make fix-copies --------- Co-authored-by: younesbelkada <younesbelkada@gmail.com>

Bugfix: LLaMA layer norm incorrectly changes input type and consumers lots of memory (#23535)
* Fixed bug where LLaMA layer norm would change input type. * make fix-copies --------- Co-authored-by: younesbelkada <younesbelkada@gmail.com>
4ddd9de9 · Tim Dettmers · GitHub · fe34486f · 4ddd9de9 · 4ddd9de9
Unverified Commit 4ddd9de9 authored May 22, 2023 by Tim Dettmers Committed by GitHub May 22, 2023
2 changed files
--- a/src/transformers/models/llama/modeling_llama.py
+++ b/src/transformers/models/llama/modeling_llama.py
@@ -81,14 +81,11 @@ class LlamaRMSNorm(nn.Module):
        self.variance_epsilon = eps
    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
        variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
-        # convert into half-precision if necessary
+        return (self.weight * hidden_states).to(input_dtype)
-        if self.weight.dtype in [torch.float16, torch.bfloat16]:
-            hidden_states = hidden_states.to(self.weight.dtype)
-        return self.weight * hidden_states
 class LlamaRotaryEmbedding(torch.nn.Module):

--- a/src/transformers/models/open_llama/modeling_open_llama.py
+++ b/src/transformers/models/open_llama/modeling_open_llama.py
@@ -91,14 +91,11 @@ class OpenLlamaRMSNorm(nn.Module):
        self.variance_epsilon = eps
    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
        variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
-        # convert into half-precision if necessary
+        return (self.weight * hidden_states).to(input_dtype)
-        if self.weight.dtype in [torch.float16, torch.bfloat16]:
-            hidden_states = hidden_states.to(self.weight.dtype)
-        return self.weight * hidden_states
 # Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->OpenLlama