Unverified Commit a23819ed authored by David Yang's avatar David Yang Committed by GitHub
Browse files

Clean up deprecation warnings (#19654)

* Clean up deprecation warnings

Notes:
Changed some strings in tests to raw strings, which will change the literal content of the strings as they are fed into whatever machine handles them.
Test cases for past in the past/past_key_values switch changed/removed due to warning of impending removal

* Add PILImageResampling abstraction for PIL.Image.Resampling
parent af556a09
...@@ -23,6 +23,7 @@ import torch ...@@ -23,6 +23,7 @@ import torch
from PIL import Image from PIL import Image
from torch import nn from torch import nn
from transformers.image_utils import PILImageResampling
from utils import img_tensorize from utils import img_tensorize
...@@ -59,7 +60,7 @@ class ResizeShortestEdge: ...@@ -59,7 +60,7 @@ class ResizeShortestEdge:
if img.dtype == np.uint8: if img.dtype == np.uint8:
pil_image = Image.fromarray(img) pil_image = Image.fromarray(img)
pil_image = pil_image.resize((neww, newh), Image.BILINEAR) pil_image = pil_image.resize((neww, newh), PILImageResampling.BILINEAR)
img = np.asarray(pil_image) img = np.asarray(pil_image)
else: else:
img = img.permute(2, 0, 1).unsqueeze(0) # 3, 0, 1) # hw(c) -> nchw img = img.permute(2, 0, 1).unsqueeze(0) # 3, 0, 1) # hw(c) -> nchw
......
...@@ -23,6 +23,7 @@ import torch ...@@ -23,6 +23,7 @@ import torch
from PIL import Image from PIL import Image
from torch import nn from torch import nn
from transformers.image_utils import PILImageResampling
from utils import img_tensorize from utils import img_tensorize
...@@ -59,7 +60,7 @@ class ResizeShortestEdge: ...@@ -59,7 +60,7 @@ class ResizeShortestEdge:
if img.dtype == np.uint8: if img.dtype == np.uint8:
pil_image = Image.fromarray(img) pil_image = Image.fromarray(img)
pil_image = pil_image.resize((neww, newh), Image.BILINEAR) pil_image = pil_image.resize((neww, newh), PILImageResampling.BILINEAR)
img = np.asarray(pil_image) img = np.asarray(pil_image)
else: else:
img = img.permute(2, 0, 1).unsqueeze(0) # 3, 0, 1) # hw(c) -> nchw img = img.permute(2, 0, 1).unsqueeze(0) # 3, 0, 1) # hw(c) -> nchw
......
...@@ -815,23 +815,23 @@ class DataCollatorForLanguageModeling(DataCollatorMixin): ...@@ -815,23 +815,23 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
special_tokens_mask = [ special_tokens_mask = [
self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist() self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()
] ]
special_tokens_mask = np.array(special_tokens_mask, dtype=np.bool) special_tokens_mask = np.array(special_tokens_mask, dtype=bool)
else: else:
special_tokens_mask = special_tokens_mask.astype(np.bool) special_tokens_mask = special_tokens_mask.astype(bool)
probability_matrix[special_tokens_mask] = 0 probability_matrix[special_tokens_mask] = 0
# Numpy doesn't have bernoulli, so we use a binomial with 1 trial # Numpy doesn't have bernoulli, so we use a binomial with 1 trial
masked_indices = np.random.binomial(1, probability_matrix, size=probability_matrix.shape).astype(np.bool) masked_indices = np.random.binomial(1, probability_matrix, size=probability_matrix.shape).astype(bool)
labels[~masked_indices] = -100 # We only compute loss on masked tokens labels[~masked_indices] = -100 # We only compute loss on masked tokens
# 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
indices_replaced = np.random.binomial(1, 0.8, size=labels.shape).astype(np.bool) & masked_indices indices_replaced = np.random.binomial(1, 0.8, size=labels.shape).astype(bool) & masked_indices
inputs[indices_replaced] = self.tokenizer.mask_token_id inputs[indices_replaced] = self.tokenizer.mask_token_id
# 10% of the time, we replace masked input tokens with random word # 10% of the time, we replace masked input tokens with random word
# indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced # indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
indices_random = ( indices_random = (
np.random.binomial(1, 0.5, size=labels.shape).astype(np.bool) & masked_indices & ~indices_replaced np.random.binomial(1, 0.5, size=labels.shape).astype(bool) & masked_indices & ~indices_replaced
) )
random_words = np.random.randint( random_words = np.random.randint(
low=0, high=len(self.tokenizer), size=np.count_nonzero(indices_random), dtype=np.int64 low=0, high=len(self.tokenizer), size=np.count_nonzero(indices_random), dtype=np.int64
...@@ -1086,12 +1086,12 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling): ...@@ -1086,12 +1086,12 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):
labels = np.copy(inputs) labels = np.copy(inputs)
# We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa)
masked_indices = mask_labels.astype(np.bool) masked_indices = mask_labels.astype(bool)
special_tokens_mask = [ special_tokens_mask = [
self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist() self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()
] ]
masked_indices[np.array(special_tokens_mask, dtype=np.bool)] = 0 masked_indices[np.array(special_tokens_mask, dtype=bool)] = 0
if self.tokenizer._pad_token is not None: if self.tokenizer._pad_token is not None:
padding_mask = labels == self.tokenizer.pad_token_id padding_mask = labels == self.tokenizer.pad_token_id
masked_indices[padding_mask] = 0 masked_indices[padding_mask] = 0
...@@ -1099,13 +1099,13 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling): ...@@ -1099,13 +1099,13 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):
labels[~masked_indices] = -100 # We only compute loss on masked tokens labels[~masked_indices] = -100 # We only compute loss on masked tokens
# 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
indices_replaced = np.random.binomial(1, 0.8, size=labels.shape).astype(np.bool) & masked_indices indices_replaced = np.random.binomial(1, 0.8, size=labels.shape).astype(bool) & masked_indices
inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids(self.tokenizer.mask_token) inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids(self.tokenizer.mask_token)
# 10% of the time, we replace masked input tokens with random word # 10% of the time, we replace masked input tokens with random word
# indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced # indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
indices_random = ( indices_random = (
np.random.binomial(1, 0.5, size=labels.shape).astype(np.bool) & masked_indices & ~indices_replaced np.random.binomial(1, 0.5, size=labels.shape).astype(bool) & masked_indices & ~indices_replaced
) )
random_words = np.random.randint(low=0, high=len(self.tokenizer), size=labels.shape, dtype=np.int64) random_words = np.random.randint(low=0, high=len(self.tokenizer), size=labels.shape, dtype=np.int64)
inputs[indices_random] = random_words[indices_random] inputs[indices_random] = random_words[indices_random]
...@@ -1363,7 +1363,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin): ...@@ -1363,7 +1363,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin):
labels = tf.identity(inputs) labels = tf.identity(inputs)
# Creating the mask and target_mapping tensors # Creating the mask and target_mapping tensors
masked_indices = np.full(labels.shape.as_list(), 0, dtype=np.bool) masked_indices = np.full(labels.shape.as_list(), 0, dtype=bool)
labels_shape = tf.shape(labels) labels_shape = tf.shape(labels)
target_mapping = np.zeros((labels_shape[0], labels_shape[1], labels_shape[1]), dtype=np.float32) target_mapping = np.zeros((labels_shape[0], labels_shape[1], labels_shape[1]), dtype=np.float32)
...@@ -1472,7 +1472,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin): ...@@ -1472,7 +1472,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin):
labels = np.copy(inputs) labels = np.copy(inputs)
# Creating the mask and target_mapping tensors # Creating the mask and target_mapping tensors
masked_indices = np.full(labels.shape, 0, dtype=np.bool) masked_indices = np.full(labels.shape, 0, dtype=bool)
target_mapping = np.zeros((labels.shape[0], labels.shape[1], labels.shape[1]), dtype=np.float32) target_mapping = np.zeros((labels.shape[0], labels.shape[1], labels.shape[1]), dtype=np.float32)
for i in range(labels.shape[0]): for i in range(labels.shape[0]):
...@@ -1497,7 +1497,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin): ...@@ -1497,7 +1497,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin):
special_tokens_mask = np.array( special_tokens_mask = np.array(
[self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()], [self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()],
dtype=np.bool, dtype=bool,
) )
masked_indices[special_tokens_mask] = 0 masked_indices[special_tokens_mask] = 0
if self.tokenizer._pad_token is not None: if self.tokenizer._pad_token is not None:
......
...@@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, Union ...@@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, Union
import numpy as np import numpy as np
from transformers.image_utils import PILImageResampling
from transformers.utils.import_utils import is_flax_available, is_tf_available, is_torch_available, is_vision_available from transformers.utils.import_utils import is_flax_available, is_tf_available, is_torch_available, is_vision_available
...@@ -216,7 +217,7 @@ def get_resize_output_image_size( ...@@ -216,7 +217,7 @@ def get_resize_output_image_size(
def resize( def resize(
image, image,
size: Tuple[int, int], size: Tuple[int, int],
resample=PIL.Image.BILINEAR, resample=PILImageResampling.BILINEAR,
data_format: Optional[ChannelDimension] = None, data_format: Optional[ChannelDimension] = None,
return_numpy: bool = True, return_numpy: bool = True,
) -> np.ndarray: ) -> np.ndarray:
...@@ -228,7 +229,7 @@ def resize( ...@@ -228,7 +229,7 @@ def resize(
The image to resize. The image to resize.
size (`Tuple[int, int]`): size (`Tuple[int, int]`):
The size to use for resizing the image. The size to use for resizing the image.
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
The filter to user for resampling. The filter to user for resampling.
data_format (`ChannelDimension`, *optional*): data_format (`ChannelDimension`, *optional*):
The channel dimension format of the output image. If `None`, will use the inferred format from the input. The channel dimension format of the output image. If `None`, will use the inferred format from the input.
......
...@@ -17,6 +17,7 @@ import os ...@@ -17,6 +17,7 @@ import os
from typing import TYPE_CHECKING, List, Tuple, Union from typing import TYPE_CHECKING, List, Tuple, Union
import numpy as np import numpy as np
from packaging import version
import requests import requests
...@@ -34,6 +35,10 @@ if is_vision_available(): ...@@ -34,6 +35,10 @@ if is_vision_available():
import PIL.Image import PIL.Image
import PIL.ImageOps import PIL.ImageOps
if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
PILImageResampling = PIL.Image.Resampling
else:
PILImageResampling = PIL.Image
if TYPE_CHECKING: if TYPE_CHECKING:
if is_torch_available(): if is_torch_available():
...@@ -364,7 +369,7 @@ class ImageFeatureExtractionMixin: ...@@ -364,7 +369,7 @@ class ImageFeatureExtractionMixin:
If `size` is an int and `default_to_square` is `True`, then image will be resized to (size, size). If If `size` is an int and `default_to_square` is `True`, then image will be resized to (size, size). If
`size` is an int and `default_to_square` is `False`, then smaller edge of the image will be matched to `size` is an int and `default_to_square` is `False`, then smaller edge of the image will be matched to
this number. i.e, if height > width, then image will be rescaled to (size * height / width, size). this number. i.e, if height > width, then image will be rescaled to (size * height / width, size).
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
The filter to user for resampling. The filter to user for resampling.
default_to_square (`bool`, *optional*, defaults to `True`): default_to_square (`bool`, *optional*, defaults to `True`):
How to convert `size` when it is a single int. If set to `True`, the `size` will be converted to a How to convert `size` when it is a single int. If set to `True`, the `size` will be converted to a
...@@ -380,7 +385,7 @@ class ImageFeatureExtractionMixin: ...@@ -380,7 +385,7 @@ class ImageFeatureExtractionMixin:
Returns: Returns:
image: A resized `PIL.Image.Image`. image: A resized `PIL.Image.Image`.
""" """
resample = resample if resample is not None else PIL.Image.BILINEAR resample = resample if resample is not None else PILImageResampling.BILINEAR
self._ensure_format_supported(image) self._ensure_format_supported(image)
......
...@@ -22,8 +22,6 @@ from functools import partial ...@@ -22,8 +22,6 @@ from functools import partial
from pickle import UnpicklingError from pickle import UnpicklingError
from typing import Any, Dict, Set, Tuple, Union from typing import Any, Dict, Set, Tuple, Union
import numpy as np
import flax.linen as nn import flax.linen as nn
import jax import jax
import jax.numpy as jnp import jax.numpy as jnp
...@@ -82,9 +80,9 @@ def dtype_byte_size(dtype): ...@@ -82,9 +80,9 @@ def dtype_byte_size(dtype):
4 4
``` ```
""" """
if dtype == np.bool: if dtype == bool:
return 1 / 8 return 1 / 8
bit_search = re.search("[^\d](\d+)$", dtype.name) bit_search = re.search(r"[^\d](\d+)$", dtype.name)
if bit_search is None: if bit_search is None:
raise ValueError(f"`dtype` is not a valid dtype: {dtype}.") raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
bit_size = int(bit_search.groups()[0]) bit_size = int(bit_search.groups()[0])
......
...@@ -605,7 +605,7 @@ def dtype_byte_size(dtype): ...@@ -605,7 +605,7 @@ def dtype_byte_size(dtype):
""" """
if dtype == tf.bool: if dtype == tf.bool:
return 1 / 8 return 1 / 8
bit_search = re.search("[^\d](\d+)$", dtype.name) bit_search = re.search(r"[^\d](\d+)$", dtype.name)
if bit_search is None: if bit_search is None:
raise ValueError(f"`dtype` is not a valid dtype: {dtype}.") raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
bit_size = int(bit_search.groups()[0]) bit_size = int(bit_search.groups()[0])
......
...@@ -32,6 +32,7 @@ from transformers import ( ...@@ -32,6 +32,7 @@ from transformers import (
BeitForMaskedImageModeling, BeitForMaskedImageModeling,
BeitForSemanticSegmentation, BeitForSemanticSegmentation,
) )
from transformers.image_utils import PILImageResampling
from transformers.utils import logging from transformers.utils import logging
...@@ -269,7 +270,9 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path): ...@@ -269,7 +270,9 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path):
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"]) image = Image.open(ds[0]["file"])
else: else:
feature_extractor = BeitFeatureExtractor(size=config.image_size, resample=Image.BILINEAR, do_center_crop=False) feature_extractor = BeitFeatureExtractor(
size=config.image_size, resample=PILImageResampling.BILINEAR, do_center_crop=False
)
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt") encoding = feature_extractor(images=image, return_tensors="pt")
......
...@@ -19,6 +19,8 @@ from typing import List, Optional, Tuple, Union ...@@ -19,6 +19,8 @@ from typing import List, Optional, Tuple, Union
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from transformers.image_utils import PILImageResampling
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from ...image_utils import ( from ...image_utils import (
IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_MEAN,
...@@ -50,10 +52,11 @@ class BeitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -50,10 +52,11 @@ class BeitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
set to `True`. set to `True`.
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
if `do_resize` is set to `True`. `PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
to `True`.
do_center_crop (`bool`, *optional*, defaults to `True`): do_center_crop (`bool`, *optional*, defaults to `True`):
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
image is padded with 0's and then center cropped. image is padded with 0's and then center cropped.
...@@ -77,7 +80,7 @@ class BeitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -77,7 +80,7 @@ class BeitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
self, self,
do_resize=True, do_resize=True,
size=256, size=256,
resample=Image.BICUBIC, resample=PILImageResampling.BICUBIC,
do_center_crop=True, do_center_crop=True,
crop_size=224, crop_size=224,
do_normalize=True, do_normalize=True,
......
...@@ -548,7 +548,7 @@ class SudachiTokenizer: ...@@ -548,7 +548,7 @@ class SudachiTokenizer:
raise ValueError("Invalid sudachi_split_mode is specified.") raise ValueError("Invalid sudachi_split_mode is specified.")
self.sudachi = dictionary.Dictionary( self.sudachi = dictionary.Dictionary(
config_path=sudachi_config_path, resource_dir=sudachi_resource_dir, dict_type=sudachi_dict_type config_path=sudachi_config_path, resource_dir=sudachi_resource_dir, dict=sudachi_dict_type
).create(self.split_mode) ).create(self.split_mode)
def tokenize(self, text, never_split=None, **kwargs): def tokenize(self, text, never_split=None, **kwargs):
......
...@@ -19,6 +19,8 @@ from typing import List, Optional, Union ...@@ -19,6 +19,8 @@ from typing import List, Optional, Union
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from transformers.image_utils import PILImageResampling
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor
from ...utils import TensorType, logging from ...utils import TensorType, logging
...@@ -39,10 +41,11 @@ class CLIPFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -39,10 +41,11 @@ class CLIPFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
Whether to resize the input to a certain `size`. Whether to resize the input to a certain `size`.
size (`int`, *optional*, defaults to 224): size (`int`, *optional*, defaults to 224):
Resize the input to the given size. Only has an effect if `do_resize` is set to `True`. Resize the input to the given size. Only has an effect if `do_resize` is set to `True`.
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
if `do_resize` is set to `True`. `PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
to `True`.
do_center_crop (`bool`, *optional*, defaults to `True`): do_center_crop (`bool`, *optional*, defaults to `True`):
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
image is padded with 0's and then center cropped. image is padded with 0's and then center cropped.
...@@ -64,7 +67,7 @@ class CLIPFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -64,7 +67,7 @@ class CLIPFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
self, self,
do_resize=True, do_resize=True,
size=224, size=224,
resample=Image.BICUBIC, resample=PILImageResampling.BICUBIC,
do_center_crop=True, do_center_crop=True,
crop_size=224, crop_size=224,
do_normalize=True, do_normalize=True,
......
...@@ -19,6 +19,8 @@ from typing import Optional, Union ...@@ -19,6 +19,8 @@ from typing import Optional, Union
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from transformers.image_utils import PILImageResampling
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from ...image_utils import ( from ...image_utils import (
IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_MEAN,
...@@ -47,10 +49,11 @@ class ConvNextFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix ...@@ -47,10 +49,11 @@ class ConvNextFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
Resize the input to the given size. If 384 or larger, the image is resized to (`size`, `size`). Else, the Resize the input to the given size. If 384 or larger, the image is resized to (`size`, `size`). Else, the
smaller edge of the image will be matched to int(`size`/ `crop_pct`), after which the image is cropped to smaller edge of the image will be matched to int(`size`/ `crop_pct`), after which the image is cropped to
`size`. Only has an effect if `do_resize` is set to `True`. `size`. Only has an effect if `do_resize` is set to `True`.
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
if `do_resize` is set to `True`. `PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
to `True`.
crop_pct (`float`, *optional*): crop_pct (`float`, *optional*):
The percentage of the image to crop. If `None`, then a cropping percentage of 224 / 256 is used. Only has The percentage of the image to crop. If `None`, then a cropping percentage of 224 / 256 is used. Only has
an effect if `do_resize` is set to `True` and `size` < 384. an effect if `do_resize` is set to `True` and `size` < 384.
...@@ -68,7 +71,7 @@ class ConvNextFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix ...@@ -68,7 +71,7 @@ class ConvNextFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
self, self,
do_resize=True, do_resize=True,
size=224, size=224,
resample=Image.BICUBIC, resample=PILImageResampling.BICUBIC,
crop_pct=None, crop_pct=None,
do_normalize=True, do_normalize=True,
image_mean=None, image_mean=None,
......
...@@ -143,7 +143,7 @@ def _compute_mask_indices( ...@@ -143,7 +143,7 @@ def _compute_mask_indices(
) )
# SpecAugment mask to fill # SpecAugment mask to fill
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool) spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
spec_aug_mask_idxs = [] spec_aug_mask_idxs = []
max_num_masked_span = compute_num_masked_span(sequence_length) max_num_masked_span = compute_num_masked_span(sequence_length)
......
...@@ -19,6 +19,8 @@ from typing import Optional, Union ...@@ -19,6 +19,8 @@ from typing import Optional, Union
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from transformers.image_utils import PILImageResampling
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from ...image_utils import ( from ...image_utils import (
IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_MEAN,
...@@ -47,10 +49,11 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -47,10 +49,11 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
set to `True`. set to `True`.
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
if `do_resize` is set to `True`. `PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
to `True`.
do_center_crop (`bool`, *optional*, defaults to `True`): do_center_crop (`bool`, *optional*, defaults to `True`):
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
image is padded with 0's and then center cropped. image is padded with 0's and then center cropped.
...@@ -70,7 +73,7 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -70,7 +73,7 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
self, self,
do_resize=True, do_resize=True,
size=256, size=256,
resample=Image.BICUBIC, resample=PILImageResampling.BICUBIC,
do_center_crop=True, do_center_crop=True,
crop_size=224, crop_size=224,
do_normalize=True, do_normalize=True,
......
...@@ -25,6 +25,7 @@ from PIL import Image ...@@ -25,6 +25,7 @@ from PIL import Image
import requests import requests
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
from transformers import BeitConfig, BeitFeatureExtractor, BeitForImageClassification, BeitForMaskedImageModeling from transformers import BeitConfig, BeitFeatureExtractor, BeitForImageClassification, BeitForMaskedImageModeling
from transformers.image_utils import PILImageResampling
from transformers.utils import logging from transformers.utils import logging
...@@ -170,7 +171,9 @@ def convert_dit_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub ...@@ -170,7 +171,9 @@ def convert_dit_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub
model.load_state_dict(state_dict) model.load_state_dict(state_dict)
# Check outputs on an image # Check outputs on an image
feature_extractor = BeitFeatureExtractor(size=config.image_size, resample=Image.BILINEAR, do_center_crop=False) feature_extractor = BeitFeatureExtractor(
size=config.image_size, resample=PILImageResampling.BILINEAR, do_center_crop=False
)
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt") encoding = feature_extractor(images=image, return_tensors="pt")
......
...@@ -19,6 +19,8 @@ from typing import Optional, Tuple, Union ...@@ -19,6 +19,8 @@ from typing import Optional, Tuple, Union
import numpy as np import numpy as np
from PIL import Image, ImageOps from PIL import Image, ImageOps
from transformers.image_utils import PILImageResampling
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from ...image_utils import ( from ...image_utils import (
IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_MEAN,
...@@ -46,10 +48,11 @@ class DonutFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin) ...@@ -46,10 +48,11 @@ class DonutFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
size (`Tuple(int)`, *optional*, defaults to [1920, 2560]): size (`Tuple(int)`, *optional*, defaults to [1920, 2560]):
Resize the shorter edge of the input to the minimum value of the given size. Should be a tuple of (width, Resize the shorter edge of the input to the minimum value of the given size. Should be a tuple of (width,
height). Only has an effect if `do_resize` is set to `True`. height). Only has an effect if `do_resize` is set to `True`.
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
if `do_resize` is set to `True`. `PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
to `True`.
do_thumbnail (`bool`, *optional*, defaults to `True`): do_thumbnail (`bool`, *optional*, defaults to `True`):
Whether to thumbnail the input to the given `size`. Whether to thumbnail the input to the given `size`.
do_align_long_axis (`bool`, *optional*, defaults to `False`): do_align_long_axis (`bool`, *optional*, defaults to `False`):
...@@ -71,7 +74,7 @@ class DonutFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin) ...@@ -71,7 +74,7 @@ class DonutFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
self, self,
do_resize=True, do_resize=True,
size=[1920, 2560], size=[1920, 2560],
resample=Image.BILINEAR, resample=PILImageResampling.BILINEAR,
do_thumbnail=True, do_thumbnail=True,
do_align_long_axis=False, do_align_long_axis=False,
do_pad=True, do_pad=True,
......
...@@ -19,6 +19,8 @@ from typing import List, Optional, Tuple, Union ...@@ -19,6 +19,8 @@ from typing import List, Optional, Tuple, Union
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from transformers.image_utils import PILImageResampling
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from ...image_utils import ( from ...image_utils import (
IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_MEAN,
...@@ -55,10 +57,11 @@ class DPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -55,10 +57,11 @@ class DPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
`True`. `True`.
keep_aspect_ratio (`bool`, *optional*, defaults to `False`): keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
Whether to keep the aspect ratio of the input. Only has an effect if `do_resize` is set to `True`. Whether to keep the aspect ratio of the input. Only has an effect if `do_resize` is set to `True`.
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
if `do_resize` is set to `True`. `PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
to `True`.
do_normalize (`bool`, *optional*, defaults to `True`): do_normalize (`bool`, *optional*, defaults to `True`):
Whether or not to normalize the input with mean and standard deviation. Whether or not to normalize the input with mean and standard deviation.
image_mean (`List[int]`, defaults to `[0.5, 0.5, 0.5]`): image_mean (`List[int]`, defaults to `[0.5, 0.5, 0.5]`):
...@@ -75,7 +78,7 @@ class DPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -75,7 +78,7 @@ class DPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
size=384, size=384,
keep_aspect_ratio=False, keep_aspect_ratio=False,
ensure_multiple_of=1, ensure_multiple_of=1,
resample=Image.BILINEAR, resample=PILImageResampling.BILINEAR,
do_normalize=True, do_normalize=True,
image_mean=None, image_mean=None,
image_std=None, image_std=None,
......
...@@ -22,6 +22,8 @@ from typing import Any, List, Optional, Tuple, Union ...@@ -22,6 +22,8 @@ from typing import Any, List, Optional, Tuple, Union
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from transformers.image_utils import PILImageResampling
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor
from ...utils import TensorType, logging from ...utils import TensorType, logging
...@@ -129,9 +131,11 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin) ...@@ -129,9 +131,11 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
Whether to resize the input to a certain `size`. Whether to resize the input to a certain `size`.
size (`int`, *optional*, defaults to 224): size (`int`, *optional*, defaults to 224):
Resize the input to the given size. Only has an effect if `do_resize` is set to `True`. Resize the input to the given size. Only has an effect if `do_resize` is set to `True`.
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`): resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
to `True`.
do_center_crop (`bool`, *optional*, defaults to `True`): do_center_crop (`bool`, *optional*, defaults to `True`):
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
image is padded with 0's and then center cropped. image is padded with 0's and then center cropped.
...@@ -160,9 +164,11 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin) ...@@ -160,9 +164,11 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
codebook_size (`int`, *optional*, defaults to 224): codebook_size (`int`, *optional*, defaults to 224):
Resize the input for codebook to the given size. Only has an effect if `codebook_do_resize` is set to Resize the input for codebook to the given size. Only has an effect if `codebook_do_resize` is set to
`True`. `True`.
codebook_resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`): codebook_resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`, An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
to `True`.
codebook_do_center_crop (`bool`, *optional*, defaults to `True`): codebook_do_center_crop (`bool`, *optional*, defaults to `True`):
Whether to crop the input for codebook at the center. If the input size is smaller than Whether to crop the input for codebook at the center. If the input size is smaller than
`codebook_crop_size` along any edge, the image is padded with 0's and then center cropped. `codebook_crop_size` along any edge, the image is padded with 0's and then center cropped.
...@@ -184,7 +190,7 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin) ...@@ -184,7 +190,7 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
self, self,
do_resize: bool = True, do_resize: bool = True,
size: Union[int, Tuple[int, int]] = 224, size: Union[int, Tuple[int, int]] = 224,
resample: int = Image.BICUBIC, resample: int = PILImageResampling.BICUBIC,
do_center_crop: bool = True, do_center_crop: bool = True,
crop_size: Union[int, Tuple[int, int]] = 224, crop_size: Union[int, Tuple[int, int]] = 224,
do_normalize: bool = True, do_normalize: bool = True,
...@@ -200,7 +206,7 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin) ...@@ -200,7 +206,7 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
# Codebook related params # Codebook related params
codebook_do_resize: bool = True, codebook_do_resize: bool = True,
codebook_size: bool = 112, codebook_size: bool = 112,
codebook_resample: int = Image.LANCZOS, codebook_resample: int = PILImageResampling.LANCZOS,
codebook_do_center_crop: bool = True, codebook_do_center_crop: bool = True,
codebook_crop_size: int = 112, codebook_crop_size: int = 112,
codebook_do_map_pixels: bool = True, codebook_do_map_pixels: bool = True,
......
...@@ -19,6 +19,7 @@ from typing import List, Optional, Union ...@@ -19,6 +19,7 @@ from typing import List, Optional, Union
import numpy as np import numpy as np
import PIL.Image import PIL.Image
from transformers.image_utils import PILImageResampling
from transformers.utils.generic import TensorType from transformers.utils.generic import TensorType
from ...image_processing_utils import BaseImageProcessor, BatchFeature from ...image_processing_utils import BaseImageProcessor, BatchFeature
...@@ -41,7 +42,7 @@ class GLPNImageProcessor(BaseImageProcessor): ...@@ -41,7 +42,7 @@ class GLPNImageProcessor(BaseImageProcessor):
size_divisor (`int`, *optional*, defaults to 32): size_divisor (`int`, *optional*, defaults to 32):
Set the class default for the `size_divisor` parameter. When `do_resize` is `True`, images are resized so Set the class default for the `size_divisor` parameter. When `do_resize` is `True`, images are resized so
their height and width are rounded down to the closest multiple of `size_divisor`. their height and width are rounded down to the closest multiple of `size_divisor`.
resample (`PIL.Image` resampling filter, *optional*, defaults to `PIL.Image.BILINEAR`): resample (`PIL.Image` resampling filter, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
Set the class default for `resample`. Defines the resampling filter to use if resizing the image. Set the class default for `resample`. Defines the resampling filter to use if resizing the image.
do_rescale (`bool`, *optional*, defaults to `True`): do_rescale (`bool`, *optional*, defaults to `True`):
Set the class default for the `do_rescale` parameter. Controls whether or not to apply the scaling factor Set the class default for the `do_rescale` parameter. Controls whether or not to apply the scaling factor
...@@ -54,7 +55,7 @@ class GLPNImageProcessor(BaseImageProcessor): ...@@ -54,7 +55,7 @@ class GLPNImageProcessor(BaseImageProcessor):
self, self,
do_resize: bool = True, do_resize: bool = True,
size_divisor: int = 32, size_divisor: int = 32,
resample=PIL.Image.BILINEAR, resample=PILImageResampling.BILINEAR,
do_rescale: bool = True, do_rescale: bool = True,
**kwargs **kwargs
) -> None: ) -> None:
...@@ -79,7 +80,7 @@ class GLPNImageProcessor(BaseImageProcessor): ...@@ -79,7 +80,7 @@ class GLPNImageProcessor(BaseImageProcessor):
The image is resized so its height and width are rounded down to the closest multiple of The image is resized so its height and width are rounded down to the closest multiple of
`size_divisor`. `size_divisor`.
resample: resample:
`PIL.Image` resampling filter to use when resizing the image e.g. `PIL.Image.BILINEAR`. `PIL.Image` resampling filter to use when resizing the image e.g. `PIL.Image.Resampling.BILINEAR`.
data_format (`ChannelDimension`, *optional*): data_format (`ChannelDimension`, *optional*):
The channel dimension format for the output image. If `None`, the channel dimension format of the input The channel dimension format for the output image. If `None`, the channel dimension format of the input
image is used. Can be one of: image is used. Can be one of:
...@@ -141,8 +142,8 @@ class GLPNImageProcessor(BaseImageProcessor): ...@@ -141,8 +142,8 @@ class GLPNImageProcessor(BaseImageProcessor):
When `do_resize` is `True`, images are resized so their height and width are rounded down to the When `do_resize` is `True`, images are resized so their height and width are rounded down to the
closest multiple of `size_divisor`. closest multiple of `size_divisor`.
resample (`PIL.Image` resampling filter, *optional*, defaults to `self.resample`): resample (`PIL.Image` resampling filter, *optional*, defaults to `self.resample`):
`PIL.Image` resampling filter to use if resizing the image e.g. `PIL.Image.BILINEAR`. Only has an `PIL.Image` resampling filter to use if resizing the image e.g. `PIL.Image.Resampling.BILINEAR`. Only
effect if `do_resize` is set to `True`. has an effect if `do_resize` is set to `True`.
do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
Whether or not to apply the scaling factor (to make pixel values floats between 0. and 1.). Whether or not to apply the scaling factor (to make pixel values floats between 0. and 1.).
return_tensors (`str`, *optional*): return_tensors (`str`, *optional*):
......
...@@ -353,7 +353,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): ...@@ -353,7 +353,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
def call( def call(
self, self,
input_ids: Optional[TFModelInputType] = None, input_ids: Optional[TFModelInputType] = None,
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
...@@ -378,11 +378,11 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): ...@@ -378,11 +378,11 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
else: else:
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
if past is None: if past_key_values is None:
past_length = 0 past_length = 0
past = [None] * len(self.h) past_key_values = [None] * len(self.h)
else: else:
past_length = shape_list(past[0][0])[-2] past_length = shape_list(past_key_values[0][0])[-2]
if position_ids is None: if position_ids is None:
position_ids = tf.expand_dims(tf.range(past_length, input_shape[-1] + past_length), axis=0) position_ids = tf.expand_dims(tf.range(past_length, input_shape[-1] + past_length), axis=0)
...@@ -473,7 +473,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): ...@@ -473,7 +473,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
for i, (block, layer_past) in enumerate(zip(self.h, past)): for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
if output_hidden_states: if output_hidden_states:
all_hidden_states = all_hidden_states + (tf.reshape(hidden_states, output_shape),) all_hidden_states = all_hidden_states + (tf.reshape(hidden_states, output_shape),)
...@@ -650,19 +650,20 @@ GPT2_START_DOCSTRING = r""" ...@@ -650,19 +650,20 @@ GPT2_START_DOCSTRING = r"""
GPT2_INPUTS_DOCSTRING = r""" GPT2_INPUTS_DOCSTRING = r"""
Args: Args:
input_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, input_ids_length)`): input_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, input_ids_length)`):
`input_ids_length` = `sequence_length` if `past` is `None` else `past[0].shape[-2]` (`sequence_length` of `input_ids_length` = `sequence_length` if `past_key_values` is `None` else `past_key_values[0].shape[-2]`
input past key value states). Indices of input sequence tokens in the vocabulary. (`sequence_length` of input past key value states). Indices of input sequence tokens in the vocabulary.
If `past` is used, only input IDs that do not have their past calculated should be passed as `input_ids`. If `past_key_values` is used, only input IDs that do not have their past calculated should be passed as
`input_ids`.
Indices can be obtained using [`GPT2Tokenizer`]. See [`PreTrainedTokenizer.__call__`] and Indices can be obtained using [`GPT2Tokenizer`]. See [`PreTrainedTokenizer.__call__`] and
[`PreTrainedTokenizer.encode`] for details. [`PreTrainedTokenizer.encode`] for details.
[What are input IDs?](../glossary#input-ids) [What are input IDs?](../glossary#input-ids)
past (`List[tf.Tensor]` of length `config.n_layers`): past_key_values (`List[tf.Tensor]` of length `config.n_layers`):
Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model (see Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model (see
`past` output below). Can be used to speed up sequential decoding. The token ids which have their past `past_key_values` output below). Can be used to speed up sequential decoding. The token ids which have
given to this model should not be passed as input ids as they have already been computed. their past given to this model should not be passed as input ids as they have already been computed.
attention_mask (`tf.Tensor` or `Numpy array` of shape `(batch_size, sequence_length)`, *optional*): attention_mask (`tf.Tensor` or `Numpy array` of shape `(batch_size, sequence_length)`, *optional*):
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
...@@ -734,7 +735,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel): ...@@ -734,7 +735,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
def call( def call(
self, self,
input_ids: Optional[TFModelInputType] = None, input_ids: Optional[TFModelInputType] = None,
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
...@@ -759,7 +760,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel): ...@@ -759,7 +760,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
past (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`) past_key_values (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`)
contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding. contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
If `past` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have If `past` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have
their past key value states given to this model) of shape `(batch_size, 1)` instead of all their past key value states given to this model) of shape `(batch_size, 1)` instead of all
...@@ -771,7 +772,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel): ...@@ -771,7 +772,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
outputs = self.transformer( outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
past=past, past_key_values=past_key_values,
attention_mask=attention_mask, attention_mask=attention_mask,
token_type_ids=token_type_ids, token_type_ids=token_type_ids,
position_ids=position_ids, position_ids=position_ids,
...@@ -847,7 +848,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -847,7 +848,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
"input_ids": inputs, "input_ids": inputs,
"attention_mask": attention_mask, "attention_mask": attention_mask,
"position_ids": position_ids, "position_ids": position_ids,
"past": past, "past_key_values": past,
"use_cache": use_cache, "use_cache": use_cache,
"token_type_ids": token_type_ids, "token_type_ids": token_type_ids,
} }
...@@ -863,7 +864,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -863,7 +864,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
def call( def call(
self, self,
input_ids: Optional[TFModelInputType] = None, input_ids: Optional[TFModelInputType] = None,
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
...@@ -889,7 +890,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -889,7 +890,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
- 1 for tokens that are **not masked**, - 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**. - 0 for tokens that are **masked**.
past (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`) past_key_values (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`)
contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding. contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
If `past` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have If `past` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have
their past key value states given to this model) of shape `(batch_size, 1)` instead of all their past key value states given to this model) of shape `(batch_size, 1)` instead of all
...@@ -904,7 +905,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -904,7 +905,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
past=past, past_key_values=past_key_values,
attention_mask=attention_mask, attention_mask=attention_mask,
token_type_ids=token_type_ids, token_type_ids=token_type_ids,
position_ids=position_ids, position_ids=position_ids,
...@@ -982,7 +983,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): ...@@ -982,7 +983,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
def call( def call(
self, self,
input_ids: Optional[TFModelInputType] = None, input_ids: Optional[TFModelInputType] = None,
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
...@@ -1041,7 +1042,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): ...@@ -1041,7 +1042,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
flat_position_ids = tf.reshape(position_ids, (-1, seq_length)) if position_ids is not None else None flat_position_ids = tf.reshape(position_ids, (-1, seq_length)) if position_ids is not None else None
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
input_ids=flat_input_ids, input_ids=flat_input_ids,
past=past, past_key_values=past_key_values,
attention_mask=flat_attention_mask, attention_mask=flat_attention_mask,
token_type_ids=flat_token_type_ids, token_type_ids=flat_token_type_ids,
position_ids=flat_position_ids, position_ids=flat_position_ids,
...@@ -1138,7 +1139,7 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific ...@@ -1138,7 +1139,7 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
def call( def call(
self, self,
input_ids: Optional[TFModelInputType] = None, input_ids: Optional[TFModelInputType] = None,
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
...@@ -1158,7 +1159,7 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific ...@@ -1158,7 +1159,7 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
""" """
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
past=past, past_key_values=past_key_values,
attention_mask=attention_mask, attention_mask=attention_mask,
token_type_ids=token_type_ids, token_type_ids=token_type_ids,
position_ids=position_ids, position_ids=position_ids,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment