Remove padding_masks from `gpt_bigcode`. (#27348)

Update modeling_gpt_bigcode.py

Remove padding_masks from `gpt_bigcode`. (#27348)
Update modeling_gpt_bigcode.py
cc9f27bb · Susnato Dhar · GitHub · 8c91f15a · cc9f27bb
Unverified Commit cc9f27bb authored Nov 07, 2023 by Susnato Dhar Committed by GitHub Nov 07, 2023
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 15 deletions

src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +0 -15

No files found.
--- a/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
+++ b/src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
@@ -235,16 +235,10 @@ class GPTBigCodeAttention(nn.Module):
        encoder_attention_mask: Optional[torch.Tensor] = None,
        use_cache: Optional[bool] = False,
        output_attentions: Optional[bool] = False,
-        **kwargs,
    ) -> Union[
        Tuple[torch.Tensor, Optional[torch.Tensor]],
        Tuple[torch.Tensor, Optional[torch.Tensor], Tuple[torch.Tensor, ...]],
    ]:
-        if "padding_mask" in kwargs:
-            logger.warning_once(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-
        if encoder_hidden_states is not None:
            if not hasattr(self, "q_attn") or not self.is_cross_attention:
                raise ValueError(
@@ -308,19 +302,10 @@ class GPTBigCodeFlashAttention2(GPTBigCodeAttention):
        encoder_attention_mask: Optional[torch.Tensor] = None,
        use_cache: Optional[bool] = False,
        output_attentions: Optional[bool] = False,
-        **kwargs,
    ) -> Union[
        Tuple[torch.Tensor, Optional[torch.Tensor]],
        Tuple[torch.Tensor, Optional[torch.Tensor], Tuple[torch.Tensor, ...]],
    ]:
-        if "padding_mask" in kwargs:
-            logger.warning_once(
-                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
-            )
-
-            # overwrite attention_mask with padding_mask
-            attention_mask = kwargs.pop("padding_mask")
-
        if encoder_hidden_states is not None:
            if not hasattr(self, "q_attn") or not self.is_cross_attention:
                raise ValueError(