Cohere: Fix copied from (#31213)

Update modeling_cohere.py

Cohere: Fix copied from (#31213)
Update modeling_cohere.py
924c46d4 · Younes Belkada · GitHub · 98dd8423 · 924c46d4
Unverified Commit 924c46d4 authored Jun 03, 2024 by Younes Belkada Committed by GitHub Jun 03, 2024
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

src/transformers/models/cohere/modeling_cohere.py src/transformers/models/cohere/modeling_cohere.py +2 -1

No files found.
--- a/src/transformers/models/cohere/modeling_cohere.py
+++ b/src/transformers/models/cohere/modeling_cohere.py
@@ -310,7 +310,7 @@ class CohereAttention(nn.Module):
        return attn_output, attn_weights, past_key_value


-# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2 Llama->Cohere
+# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2 with Llama->Cohere
 class CohereFlashAttention2(CohereAttention):
    """
    Cohere flash attention module. This module inherits from `CohereAttention` as the weights of the module stays
@@ -326,6 +326,7 @@ class CohereFlashAttention2(CohereAttention):
        # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()

+    # Ignore copy
    def forward(
        self,
        hidden_states: torch.Tensor,