"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "ca3fc36de355985040ebcf992ef3d5dc9ebd5e41"
Unverified Commit dd52804f authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Remove deprecated (#8604)



* Remove old deprecated arguments
Co-authored-by: default avatarLysandreJik <lysandre.debut@reseau.eseo.fr>

* Remove needless imports

* Fix tests
Co-authored-by: default avatarLysandreJik <lysandre.debut@reseau.eseo.fr>
parent 3095ee9d
...@@ -138,7 +138,7 @@ class TestFinetuneTrainer(TestCasePlus): ...@@ -138,7 +138,7 @@ class TestFinetuneTrainer(TestCasePlus):
per_device_train_batch_size=batch_size, per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size, per_device_eval_batch_size=batch_size,
predict_with_generate=True, predict_with_generate=True,
evaluate_during_training=True, evaluation_strategy="steps",
do_train=True, do_train=True,
do_eval=True, do_eval=True,
warmup_steps=0, warmup_steps=0,
...@@ -179,7 +179,7 @@ class TestFinetuneTrainer(TestCasePlus): ...@@ -179,7 +179,7 @@ class TestFinetuneTrainer(TestCasePlus):
--per_device_eval_batch_size 4 --per_device_eval_batch_size 4
--learning_rate 3e-3 --learning_rate 3e-3
--warmup_steps 8 --warmup_steps 8
--evaluate_during_training --evaluation_strategy steps
--predict_with_generate --predict_with_generate
--logging_steps 0 --logging_steps 0
--save_steps {str(eval_steps)} --save_steps {str(eval_steps)}
......
...@@ -254,7 +254,7 @@ def main(): ...@@ -254,7 +254,7 @@ def main():
trainer.save_model() trainer.save_model()
# For convenience, we also re-save the tokenizer to the same directory, # For convenience, we also re-save the tokenizer to the same directory,
# so that you can share your model easily on huggingface.co/models =) # so that you can share your model easily on huggingface.co/models =)
if trainer.is_world_master(): if trainer.is_world_process_zero():
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
# Evaluation # Evaluation
...@@ -265,7 +265,7 @@ def main(): ...@@ -265,7 +265,7 @@ def main():
result = trainer.evaluate() result = trainer.evaluate()
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt") output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
if trainer.is_world_master(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in result.items(): for key, value in result.items():
......
...@@ -145,11 +145,11 @@ def squad_convert_example_to_features( ...@@ -145,11 +145,11 @@ def squad_convert_example_to_features(
# in the way they compute mask of added tokens. # in the way they compute mask of added tokens.
tokenizer_type = type(tokenizer).__name__.replace("Tokenizer", "").lower() tokenizer_type = type(tokenizer).__name__.replace("Tokenizer", "").lower()
sequence_added_tokens = ( sequence_added_tokens = (
tokenizer.max_len - tokenizer.max_len_single_sentence + 1 tokenizer.model_max_length - tokenizer.max_len_single_sentence + 1
if tokenizer_type in MULTI_SEP_TOKENS_TOKENIZERS_SET if tokenizer_type in MULTI_SEP_TOKENS_TOKENIZERS_SET
else tokenizer.max_len - tokenizer.max_len_single_sentence else tokenizer.model_max_length - tokenizer.max_len_single_sentence
) )
sequence_pair_added_tokens = tokenizer.max_len - tokenizer.max_len_sentences_pair sequence_pair_added_tokens = tokenizer.model_max_length - tokenizer.max_len_sentences_pair
span_doc_tokens = all_doc_tokens span_doc_tokens = all_doc_tokens
while len(spans) * doc_stride < len(all_doc_tokens): while len(spans) * doc_stride < len(all_doc_tokens):
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import math import math
import os import os
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
...@@ -742,7 +741,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel): ...@@ -742,7 +741,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`): labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`):
...@@ -753,8 +751,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel): ...@@ -753,8 +751,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
(see :obj:`input_ids` docstring) Indices should be in ``[0, 1]``. ``0`` indicates original order (sequence (see :obj:`input_ids` docstring) Indices should be in ``[0, 1]``. ``0`` indicates original order (sequence
A, then sequence B), ``1`` indicates switched order (sequence B, then sequence A). A, then sequence B), ``1`` indicates switched order (sequence B, then sequence A).
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
Returns: Returns:
...@@ -773,14 +769,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel): ...@@ -773,14 +769,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
>>> sop_logits = outputs.sop_logits >>> sop_logits = outputs.sop_logits
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.albert( outputs = self.albert(
...@@ -898,23 +886,13 @@ class AlbertForMaskedLM(AlbertPreTrainedModel): ...@@ -898,23 +886,13 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.albert( outputs = self.albert(
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
"""PyTorch BART model, ported from the fairseq repo.""" """PyTorch BART model, ported from the fairseq repo."""
import math import math
import random import random
import warnings
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import numpy as np import numpy as np
...@@ -529,7 +528,6 @@ class BartDecoder(nn.Module): ...@@ -529,7 +528,6 @@ class BartDecoder(nn.Module):
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=True, return_dict=True,
**unused,
): ):
""" """
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al., Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
...@@ -551,18 +549,6 @@ class BartDecoder(nn.Module): ...@@ -551,18 +549,6 @@ class BartDecoder(nn.Module):
- hidden states - hidden states
- attentions - attentions
""" """
if "decoder_cached_states" in unused:
warnings.warn(
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_cached_states")
if "decoder_past_key_values" in unused:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_past_key_values")
# check attention mask and invert # check attention mask and invert
if encoder_padding_mask is not None: if encoder_padding_mask is not None:
...@@ -873,14 +859,7 @@ class BartModel(PretrainedBartModel): ...@@ -873,14 +859,7 @@ class BartModel(PretrainedBartModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
if "decoder_past_key_values" in kwargs:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("decoder_past_key_values")
if decoder_input_ids is None: if decoder_input_ids is None:
use_cache = False use_cache = False
...@@ -1006,7 +985,6 @@ class BartForConditionalGeneration(PretrainedBartModel): ...@@ -1006,7 +985,6 @@ class BartForConditionalGeneration(PretrainedBartModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**unused,
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
...@@ -1034,24 +1012,6 @@ class BartForConditionalGeneration(PretrainedBartModel): ...@@ -1034,24 +1012,6 @@ class BartForConditionalGeneration(PretrainedBartModel):
>>> tokenizer.decode(predictions).split() >>> tokenizer.decode(predictions).split()
>>> # ['good', 'great', 'all', 'really', 'very'] >>> # ['good', 'great', 'all', 'really', 'very']
""" """
if "lm_labels" in unused:
warnings.warn(
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = unused.pop("lm_labels")
if "decoder_cached_states" in unused:
warnings.warn(
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_cached_states")
if "decoder_past_key_values" in unused:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_past_key_values")
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None: if labels is not None:
......
...@@ -896,7 +896,6 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -896,7 +896,6 @@ class BertForPreTraining(BertPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape ``(batch_size, sequence_length)``, `optional`): labels (:obj:`torch.LongTensor` of shape ``(batch_size, sequence_length)``, `optional`):
...@@ -928,13 +927,6 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -928,13 +927,6 @@ class BertForPreTraining(BertPreTrainedModel):
>>> prediction_logits = outputs.prediction_logits >>> prediction_logits = outputs.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits >>> seq_relationship_logits = outputs.seq_relationship_logits
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.bert( outputs = self.bert(
...@@ -1136,24 +1128,13 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -1136,24 +1128,13 @@ class BertForMaskedLM(BertPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert "lm_labels" not in kwargs, "Use `BertWithLMHead` for autoregressive language modeling task."
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
...@@ -15,9 +15,6 @@ ...@@ -15,9 +15,6 @@
# limitations under the License. # limitations under the License.
""" PyTorch CTRL model.""" """ PyTorch CTRL model."""
import warnings
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -369,15 +366,7 @@ class CTRLModel(CTRLPreTrainedModel): ...@@ -369,15 +366,7 @@ class CTRLModel(CTRLPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
if "past" in kwargs:
warnings.warn(
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("past")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
use_cache = use_cache if use_cache is not None else self.config.use_cache use_cache = use_cache if use_cache is not None else self.config.use_cache
...@@ -542,7 +531,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): ...@@ -542,7 +531,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
...@@ -550,13 +538,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): ...@@ -550,13 +538,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to ``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
""" """
if "past" in kwargs:
warnings.warn(
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("past")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
import copy import copy
import math import math
import warnings
import numpy as np import numpy as np
import torch import torch
...@@ -526,23 +525,13 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): ...@@ -526,23 +525,13 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``. (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``.
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
dlbrt_output = self.distilbert( dlbrt_output = self.distilbert(
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import math import math
import os import os
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
...@@ -1000,23 +999,13 @@ class ElectraForMaskedLM(ElectraPreTrainedModel): ...@@ -1000,23 +999,13 @@ class ElectraForMaskedLM(ElectraPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
generator_hidden_states = self.electra( generator_hidden_states = self.electra(
......
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
import math import math
import random import random
import warnings
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
import torch import torch
...@@ -618,7 +617,6 @@ class FSMTDecoder(nn.Module): ...@@ -618,7 +617,6 @@ class FSMTDecoder(nn.Module):
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=True, return_dict=True,
**unused,
): ):
""" """
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al., Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
...@@ -640,19 +638,6 @@ class FSMTDecoder(nn.Module): ...@@ -640,19 +638,6 @@ class FSMTDecoder(nn.Module):
- hidden states - hidden states
- attentions - attentions
""" """
if "decoder_cached_states" in unused:
warnings.warn(
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_cached_states")
if "decoder_past_key_values" in unused:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = unused.pop("decoder_past_key_values")
# check attention mask and invert # check attention mask and invert
if encoder_padding_mask is not None: if encoder_padding_mask is not None:
encoder_padding_mask = invert_mask(encoder_padding_mask) encoder_padding_mask = invert_mask(encoder_padding_mask)
...@@ -933,15 +918,7 @@ class FSMTModel(PretrainedFSMTModel): ...@@ -933,15 +918,7 @@ class FSMTModel(PretrainedFSMTModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
if "decoder_past_key_values" in kwargs:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("decoder_past_key_values")
if decoder_input_ids is None: if decoder_input_ids is None:
use_cache = False use_cache = False
...@@ -1071,7 +1048,6 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel): ...@@ -1071,7 +1048,6 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**unused,
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
"""PyTorch OpenAI GPT-2 model.""" """PyTorch OpenAI GPT-2 model."""
import os import os
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
...@@ -528,16 +527,7 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -528,16 +527,7 @@ class GPT2Model(GPT2PreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
if "past" in kwargs:
warnings.warn(
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("past")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -758,7 +748,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): ...@@ -758,7 +748,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
...@@ -766,13 +755,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): ...@@ -766,13 +755,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to ``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
""" """
if "past" in kwargs:
warnings.warn(
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("past")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
...@@ -900,8 +882,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -900,8 +882,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
Labels for computing the multiple choice classification loss. Indices should be in ``[0, ..., Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
`input_ids` above) `input_ids` above)
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
Return: Return:
...@@ -930,19 +910,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -930,19 +910,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
>>> mc_logits = outputs.mc_logits >>> mc_logits = outputs.mc_logits
""" """
if "lm_labels" in kwargs:
warnings.warn(
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("lm_labels")
if "past" in kwargs:
warnings.warn(
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("past")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
import json import json
import os import os
import warnings
from functools import lru_cache from functools import lru_cache
from typing import Optional, Tuple from typing import Optional, Tuple
...@@ -293,13 +292,6 @@ class GPT2Tokenizer(PreTrainedTokenizer): ...@@ -293,13 +292,6 @@ class GPT2Tokenizer(PreTrainedTokenizer):
return vocab_file, merge_file return vocab_file, merge_file
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs): def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
if "is_pretokenized" in kwargs:
warnings.warn(
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
FutureWarning,
)
is_split_into_words = kwargs.pop("is_pretokenized")
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space) add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
if is_split_into_words or add_prefix_space: if is_split_into_words or add_prefix_space:
text = " " + text text = " " + text
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import json import json
import warnings
from typing import Optional, Tuple from typing import Optional, Tuple
from tokenizers import pre_tokenizers from tokenizers import pre_tokenizers
...@@ -151,13 +150,6 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast): ...@@ -151,13 +150,6 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
self.add_prefix_space = add_prefix_space self.add_prefix_space = add_prefix_space
def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding: def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding:
if "is_pretokenized" in kwargs:
warnings.warn(
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
FutureWarning,
)
is_split_into_words = kwargs.pop("is_pretokenized")
is_split_into_words = kwargs.get("is_split_into_words", False) is_split_into_words = kwargs.get("is_split_into_words", False)
assert self.add_prefix_space or not is_split_into_words, ( assert self.add_prefix_space or not is_split_into_words, (
f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True " f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True "
...@@ -167,14 +159,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast): ...@@ -167,14 +159,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
return super()._batch_encode_plus(*args, **kwargs) return super()._batch_encode_plus(*args, **kwargs)
def _encode_plus(self, *args, **kwargs) -> BatchEncoding: def _encode_plus(self, *args, **kwargs) -> BatchEncoding:
if "is_pretokenized" in kwargs: is_split_into_words = kwargs.get("is_split_into_words", False)
warnings.warn(
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
FutureWarning,
)
is_split_into_words = kwargs.pop("is_pretokenized")
else:
is_split_into_words = kwargs.get("is_split_into_words", False)
assert self.add_prefix_space or not is_split_into_words, ( assert self.add_prefix_space or not is_split_into_words, (
f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True " f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True "
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
"""PyTorch Longformer model. """ """PyTorch Longformer model. """
import math import math
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
...@@ -1509,7 +1508,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel): ...@@ -1509,7 +1508,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
...@@ -1538,14 +1536,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel): ...@@ -1538,14 +1536,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
>>> loss = outputs.loss >>> loss = outputs.loss
>>> prediction_logits = output.logits >>> prediction_logits = output.logits
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.longformer( outputs = self.longformer(
......
...@@ -1109,7 +1109,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel): ...@@ -1109,7 +1109,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
...@@ -1119,12 +1118,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel): ...@@ -1119,12 +1118,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated. Used to hide legacy arguments that have been deprecated.
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.mobilebert( outputs = self.mobilebert(
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
import json import json
import math import math
import os import os
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple
...@@ -645,7 +644,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -645,7 +644,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input): mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input):
...@@ -659,8 +657,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -659,8 +657,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
Labels for computing the multiple choice classification loss. Indices should be in ``[0, ..., Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
`input_ids` above) `input_ids` above)
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
Return: Return:
...@@ -683,13 +679,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -683,13 +679,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
>>> mc_logits = outputs.mc_logits >>> mc_logits = outputs.mc_logits
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if "lm_labels" in kwargs:
warnings.warn(
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
input_ids, input_ids,
......
...@@ -302,7 +302,7 @@ class ProphetNetTokenizer(PreTrainedTokenizer): ...@@ -302,7 +302,7 @@ class ProphetNetTokenizer(PreTrainedTokenizer):
**kwargs, **kwargs,
) -> BatchEncoding: ) -> BatchEncoding:
if max_length is None: if max_length is None:
max_length = self.max_len max_length = self.model_max_length
model_inputs = self( model_inputs = self(
src_texts, src_texts,
add_special_tokens=True, add_special_tokens=True,
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
"""PyTorch RoBERTa model. """ """PyTorch RoBERTa model. """
import math import math
import warnings
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -872,7 +871,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel): ...@@ -872,7 +871,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
...@@ -882,13 +880,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel): ...@@ -882,13 +880,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated. Used to hide legacy arguments that have been deprecated.
""" """
if "masked_lm_labels" in kwargs:
warnings.warn(
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("masked_lm_labels")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.roberta( outputs = self.roberta(
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
# limitations under the License. # limitations under the License.
"""Tokenization classes for RoBERTa.""" """Tokenization classes for RoBERTa."""
import warnings
from typing import List, Optional from typing import List, Optional
from ...tokenization_utils import AddedToken from ...tokenization_utils import AddedToken
...@@ -251,13 +250,6 @@ class RobertaTokenizer(GPT2Tokenizer): ...@@ -251,13 +250,6 @@ class RobertaTokenizer(GPT2Tokenizer):
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0] return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs): def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
if "is_pretokenized" in kwargs:
warnings.warn(
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
FutureWarning,
)
is_split_into_words = kwargs.pop("is_pretokenized")
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space) add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()): if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
text = " " + text text = " " + text
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
import copy import copy
import math import math
import os import os
import warnings
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
...@@ -1048,7 +1047,6 @@ class T5Model(T5PreTrainedModel): ...@@ -1048,7 +1047,6 @@ class T5Model(T5PreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
Returns: Returns:
...@@ -1066,20 +1064,6 @@ class T5Model(T5PreTrainedModel): ...@@ -1066,20 +1064,6 @@ class T5Model(T5PreTrainedModel):
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
""" """
if "decoder_past_key_value_states" in kwargs:
warnings.warn(
"The `decoder_past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("decoder_past_key_value_states")
if "decoder_past_key_values" in kwargs:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("decoder_past_key_values")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
use_cache = use_cache if use_cache is not None else self.config.use_cache use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
...@@ -1198,15 +1182,12 @@ class T5ForConditionalGeneration(T5PreTrainedModel): ...@@ -1198,15 +1182,12 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
output_attentions=None, output_attentions=None,
output_hidden_states=None, output_hidden_states=None,
return_dict=None, return_dict=None,
**kwargs,
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[-100, 0, ..., Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[-100, 0, ...,
config.vocab_size - 1]`. All labels set to ``-100`` are ignored (masked), the loss is only computed for config.vocab_size - 1]`. All labels set to ``-100`` are ignored (masked), the loss is only computed for
labels in ``[0, ..., config.vocab_size]`` labels in ``[0, ..., config.vocab_size]``
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
Used to hide legacy arguments that have been deprecated.
Returns: Returns:
...@@ -1226,27 +1207,6 @@ class T5ForConditionalGeneration(T5PreTrainedModel): ...@@ -1226,27 +1207,6 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
>>> input_ids = tokenizer("summarize: studies have shown that owning a dog is good for you ", return_tensors="pt").input_ids # Batch size 1 >>> input_ids = tokenizer("summarize: studies have shown that owning a dog is good for you ", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model.generate(input_ids) >>> outputs = model.generate(input_ids)
""" """
if "lm_labels" in kwargs:
warnings.warn(
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
FutureWarning,
)
labels = kwargs.pop("lm_labels")
if "decoder_past_key_value_states" in kwargs:
warnings.warn(
"The `decoder_past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("decoder_past_key_value_states")
if "decoder_past_key_values" in kwargs:
warnings.warn(
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
FutureWarning,
)
past_key_values = kwargs.pop("decoder_past_key_values")
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
use_cache = use_cache if use_cache is not None else self.config.use_cache use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment