Unverified Commit f0fd73a2 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Document check copies (#25291)

* Document check copies better and add tests

* Include header in check for copies

* Manual fixes

* Try autofix

* Fixes

* Clean tests

* Finalize doc

* Remove debug print

* More fixes
parent 29f04002
...@@ -246,7 +246,7 @@ class EfficientFormerConvMlp(nn.Module): ...@@ -246,7 +246,7 @@ class EfficientFormerConvMlp(nn.Module):
# Copied from transformers.models.convnext.modeling_convnext.drop_path # Copied from transformers.models.convnext.modeling_convnext.drop_path
def drop_path(input, drop_prob: float = 0.0, training: bool = False): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -667,7 +667,7 @@ class TFEsmEncoder(Layer): ...@@ -667,7 +667,7 @@ class TFEsmEncoder(Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertPooler with Bert->Esm # Copied from transformers.models.bert.modeling_tf_bert.TFBertPooler with Bert->Esm
class TFEsmPooler(Layer): class TFEsmPooler(tf.keras.layers.Layer):
def __init__(self, config: EsmConfig, **kwargs): def __init__(self, config: EsmConfig, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
......
...@@ -286,7 +286,7 @@ class FocalNetPatchEmbeddings(nn.Module): ...@@ -286,7 +286,7 @@ class FocalNetPatchEmbeddings(nn.Module):
# Copied from transformers.models.beit.modeling_beit.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob=0.0, training=False, scale_by_keep=True): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -52,8 +52,8 @@ GLPN_PRETRAINED_MODEL_ARCHIVE_LIST = [ ...@@ -52,8 +52,8 @@ GLPN_PRETRAINED_MODEL_ARCHIVE_LIST = [
] ]
# Copied from transformers.models.segformer.modeling_segformer.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob: float = 0.0, training: bool = False): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -272,7 +272,7 @@ class GPTBigCodeMLP(nn.Module): ...@@ -272,7 +272,7 @@ class GPTBigCodeMLP(nn.Module):
self.dropout = nn.Dropout(config.resid_pdrop) self.dropout = nn.Dropout(config.resid_pdrop)
# Copied from transformers.models.gpt2.modeling_gpt2.GPT2MLP.forward # Copied from transformers.models.gpt2.modeling_gpt2.GPT2MLP.forward
def forward(self, hidden_states: Optional[Tuple[torch.Tensor]]) -> torch.Tensor: def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -> torch.FloatTensor:
hidden_states = self.c_fc(hidden_states) hidden_states = self.c_fc(hidden_states)
hidden_states = self.act(hidden_states) hidden_states = self.act(hidden_states)
hidden_states = self.c_proj(hidden_states) hidden_states = self.c_proj(hidden_states)
......
...@@ -30,7 +30,8 @@ from ...utils import logging ...@@ -30,7 +30,8 @@ from ...utils import logging
if TYPE_CHECKING: if TYPE_CHECKING:
from transformers.pipelines.conversational import Conversation from ...pipelines.conversational import Conversation
from ...tokenization_utils_base import TextInput
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
...@@ -168,7 +169,7 @@ class LlamaTokenizer(PreTrainedTokenizer): ...@@ -168,7 +169,7 @@ class LlamaTokenizer(PreTrainedTokenizer):
return vocab return vocab
# Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize
def tokenize(self, text, **kwargs) -> List[str]: def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
# Replace the SPIECE_UNDERLINE with a space to make sure SPIECE_UNDERLINE is only used at # Replace the SPIECE_UNDERLINE with a space to make sure SPIECE_UNDERLINE is only used at
# the beginning of the text # the beginning of the text
if not self.legacy: if not self.legacy:
...@@ -176,7 +177,7 @@ class LlamaTokenizer(PreTrainedTokenizer): ...@@ -176,7 +177,7 @@ class LlamaTokenizer(PreTrainedTokenizer):
return super().tokenize(text, **kwargs) return super().tokenize(text, **kwargs)
# Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._tokenize # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer._tokenize
def _tokenize(self, text): def _tokenize(self, text, **kwargs):
""" """
Returns a tokenized string. Returns a tokenized string.
......
...@@ -56,7 +56,7 @@ remat = nn_partitioning.remat ...@@ -56,7 +56,7 @@ remat = nn_partitioning.remat
# Copied from transformers.models.bart.modeling_flax_bart.shift_tokens_right # Copied from transformers.models.bart.modeling_flax_bart.shift_tokens_right
def shift_tokens_right(input_ids: np.array, pad_token_id: int, decoder_start_token_id: int) -> np.ndarray: def shift_tokens_right(input_ids: jnp.array, pad_token_id: int, decoder_start_token_id: int) -> jnp.ndarray:
""" """
Shift input ids one token to the right. Shift input ids one token to the right.
""" """
......
...@@ -227,7 +227,7 @@ def create_sinusoidal_positions(n_pos, dim): ...@@ -227,7 +227,7 @@ def create_sinusoidal_positions(n_pos, dim):
# Copied from transformers.models.bart.modeling_flax_bart.shift_tokens_right # Copied from transformers.models.bart.modeling_flax_bart.shift_tokens_right
def shift_tokens_right(input_ids: jnp.ndarray, pad_token_id: int, decoder_start_token_id: int) -> jnp.ndarray: def shift_tokens_right(input_ids: jnp.array, pad_token_id: int, decoder_start_token_id: int) -> jnp.ndarray:
""" """
Shift input ids one token to the right. Shift input ids one token to the right.
""" """
......
...@@ -123,7 +123,7 @@ def window_reverse(windows, window_size, height, width): ...@@ -123,7 +123,7 @@ def window_reverse(windows, window_size, height, width):
# Copied from transformers.models.swin.modeling_swin.drop_path # Copied from transformers.models.swin.modeling_swin.drop_path
def drop_path(input, drop_prob=0.0, training=False, scale_by_keep=True): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -51,7 +51,7 @@ MGP_STR_PRETRAINED_MODEL_ARCHIVE_LIST = [ ...@@ -51,7 +51,7 @@ MGP_STR_PRETRAINED_MODEL_ARCHIVE_LIST = [
# Copied from transformers.models.beit.modeling_beit.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob: float = 0.0, training: bool = False): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -263,7 +263,7 @@ class NatDownsampler(nn.Module): ...@@ -263,7 +263,7 @@ class NatDownsampler(nn.Module):
# Copied from transformers.models.beit.modeling_beit.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob=0.0, training=False, scale_by_keep=True): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -47,7 +47,6 @@ if is_torch_available(): ...@@ -47,7 +47,6 @@ if is_torch_available():
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
# Copied from transformers.models.detr.modeling_detr._upcast
def _upcast(t): def _upcast(t):
# Protects from numerical overflows in multiplications by upcasting to the equivalent higher type # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type
if t.is_floating_point(): if t.is_floating_point():
......
...@@ -22,7 +22,7 @@ from typing import Any, Dict, Optional, Tuple, Union ...@@ -22,7 +22,7 @@ from typing import Any, Dict, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
import torch.utils.checkpoint import torch.utils.checkpoint
from torch import nn from torch import Tensor, nn
from ...activations import ACT2FN from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling
...@@ -120,7 +120,7 @@ class OwlViTOutput(ModelOutput): ...@@ -120,7 +120,7 @@ class OwlViTOutput(ModelOutput):
# Copied from transformers.models.detr.modeling_detr._upcast # Copied from transformers.models.detr.modeling_detr._upcast
def _upcast(t: torch.Tensor) -> torch.Tensor: def _upcast(t: Tensor) -> Tensor:
# Protects from numerical overflows in multiplications by upcasting to the equivalent higher type # Protects from numerical overflows in multiplications by upcasting to the equivalent higher type
if t.is_floating_point(): if t.is_floating_point():
return t if t.dtype in (torch.float32, torch.float64) else t.float() return t if t.dtype in (torch.float32, torch.float64) else t.float()
...@@ -129,7 +129,7 @@ def _upcast(t: torch.Tensor) -> torch.Tensor: ...@@ -129,7 +129,7 @@ def _upcast(t: torch.Tensor) -> torch.Tensor:
# Copied from transformers.models.detr.modeling_detr.box_area # Copied from transformers.models.detr.modeling_detr.box_area
def box_area(boxes: torch.Tensor) -> torch.Tensor: def box_area(boxes: Tensor) -> Tensor:
""" """
Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates. Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates.
...@@ -146,7 +146,7 @@ def box_area(boxes: torch.Tensor) -> torch.Tensor: ...@@ -146,7 +146,7 @@ def box_area(boxes: torch.Tensor) -> torch.Tensor:
# Copied from transformers.models.detr.modeling_detr.box_iou # Copied from transformers.models.detr.modeling_detr.box_iou
def box_iou(boxes1: torch.Tensor, boxes2: torch.Tensor) -> torch.Tensor: def box_iou(boxes1, boxes2):
area1 = box_area(boxes1) area1 = box_area(boxes1)
area2 = box_area(boxes2) area2 = box_area(boxes2)
......
...@@ -210,7 +210,7 @@ PEGASUS_DECODE_INPUTS_DOCSTRING = r""" ...@@ -210,7 +210,7 @@ PEGASUS_DECODE_INPUTS_DOCSTRING = r"""
# Copied from transformers.models.bart.modeling_flax_bart.shift_tokens_right # Copied from transformers.models.bart.modeling_flax_bart.shift_tokens_right
def shift_tokens_right(input_ids: np.array, pad_token_id: int, decoder_start_token_id: int) -> np.ndarray: def shift_tokens_right(input_ids: jnp.array, pad_token_id: int, decoder_start_token_id: int) -> jnp.ndarray:
""" """
Shift input ids one token to the right. Shift input ids one token to the right.
""" """
...@@ -223,7 +223,7 @@ def shift_tokens_right(input_ids: np.array, pad_token_id: int, decoder_start_tok ...@@ -223,7 +223,7 @@ def shift_tokens_right(input_ids: np.array, pad_token_id: int, decoder_start_tok
# Copied from transformers.models.marian.modeling_flax_marian.create_sinusoidal_positions # Copied from transformers.models.marian.modeling_flax_marian.create_sinusoidal_positions
def create_sinusoidal_positions(n_pos, dim, dtype): def create_sinusoidal_positions(n_pos, dim):
position_enc = np.array([[pos / np.power(10000, 2 * (j // 2) / dim) for j in range(dim)] for pos in range(n_pos)]) position_enc = np.array([[pos / np.power(10000, 2 * (j // 2) / dim) for j in range(dim)] for pos in range(n_pos)])
sentinel = dim // 2 + dim % 2 sentinel = dim // 2 + dim % 2
out = np.zeros_like(position_enc) out = np.zeros_like(position_enc)
...@@ -686,9 +686,7 @@ class FlaxPegasusEncoder(nn.Module): ...@@ -686,9 +686,7 @@ class FlaxPegasusEncoder(nn.Module):
self.max_source_positions = self.config.max_position_embeddings self.max_source_positions = self.config.max_position_embeddings
self.embed_scale = math.sqrt(embed_dim) if self.config.scale_embedding else 1.0 self.embed_scale = math.sqrt(embed_dim) if self.config.scale_embedding else 1.0
self.embed_positions = create_sinusoidal_positions( self.embed_positions = create_sinusoidal_positions(self.config.max_position_embeddings, embed_dim)
self.config.max_position_embeddings, embed_dim, dtype=self.dtype
)
self.layers = FlaxPegasusEncoderLayerCollection(self.config, self.dtype) self.layers = FlaxPegasusEncoderLayerCollection(self.config, self.dtype)
self.layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05) self.layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05)
...@@ -755,9 +753,7 @@ class FlaxPegasusDecoder(nn.Module): ...@@ -755,9 +753,7 @@ class FlaxPegasusDecoder(nn.Module):
self.max_target_positions = self.config.max_position_embeddings self.max_target_positions = self.config.max_position_embeddings
self.embed_scale = math.sqrt(self.config.d_model) if self.config.scale_embedding else 1.0 self.embed_scale = math.sqrt(self.config.d_model) if self.config.scale_embedding else 1.0
self.embed_positions = create_sinusoidal_positions( self.embed_positions = create_sinusoidal_positions(self.config.max_position_embeddings, embed_dim)
self.config.max_position_embeddings, embed_dim, dtype=self.dtype
)
self.layers = FlaxPegasusDecoderLayerCollection(self.config, self.dtype) self.layers = FlaxPegasusDecoderLayerCollection(self.config, self.dtype)
self.layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05) self.layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05)
......
...@@ -50,7 +50,7 @@ POOLFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [ ...@@ -50,7 +50,7 @@ POOLFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
# Copied from transformers.models.beit.modeling_beit.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob: float = 0.0, training: bool = False): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -55,8 +55,8 @@ PVT_PRETRAINED_MODEL_ARCHIVE_LIST = [ ...@@ -55,8 +55,8 @@ PVT_PRETRAINED_MODEL_ARCHIVE_LIST = [
] ]
# Copied from transformers.models.convnext.modeling_convnext.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob: float = 0.0, training: bool = False, scale_by_keep=True): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -84,8 +84,8 @@ class SegFormerImageClassifierOutput(ImageClassifierOutput): ...@@ -84,8 +84,8 @@ class SegFormerImageClassifierOutput(ImageClassifierOutput):
attentions: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None
# Copied from transformers.models.convnext.modeling_convnext.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob: float = 0.0, training: bool = False, scale_by_keep=True): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -86,7 +86,7 @@ class SwiftFormerPatchEmbedding(nn.Module): ...@@ -86,7 +86,7 @@ class SwiftFormerPatchEmbedding(nn.Module):
# Copied from transformers.models.beit.modeling_beit.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(x, drop_prob: float = 0.0, training: bool = False): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -380,7 +380,7 @@ class SwinPatchMerging(nn.Module): ...@@ -380,7 +380,7 @@ class SwinPatchMerging(nn.Module):
# Copied from transformers.models.beit.modeling_beit.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob=0.0, training=False, scale_by_keep=True): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
...@@ -105,8 +105,8 @@ def window_reverse(windows, window_size, height, width): ...@@ -105,8 +105,8 @@ def window_reverse(windows, window_size, height, width):
return windows return windows
# Copied from transformers.models.swin.modeling_swin.drop_path # Copied from transformers.models.beit.modeling_beit.drop_path
def drop_path(input, drop_prob=0.0, training=False, scale_by_keep=True): def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
""" """
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment