Unverified Commit 7293fdc5 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Deprecate `TransfoXL` (#27607)



* fix

* fix

* trigger

* Apply suggestions from code review
Co-authored-by: default avatarLysandre Debut <hi@lysand.re>

* tic

* revert

* revert

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
Co-authored-by: default avatarLysandre Debut <hi@lysand.re>
parent 623432dc
......@@ -16,6 +16,29 @@ rendered properly in your Markdown viewer.
# Transformer XL
<Tip warning={true}>
This model is in maintenance mode only, so we won't accept any new PRs changing its code. This model was deprecated due to security issues linked to `pickle.load`.
We recommend switching to more recent models for improved security.
In case you would still like to use `TransfoXL` in your experiments, we recommend using the [Hub checkpoint](https://huggingface.co/transfo-xl-wt103) with a specific revision to ensure you are downloading safe files from the Hub:
```
from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
checkpoint = 'transfo-xl-wt103'
revision = '40a186da79458c9f9de846edfaea79c412137f97'
tokenizer = TransfoXLTokenizer.from_pretrained(checkpoint, revision=revision)
model = TransfoXLLMHeadModel.from_pretrained(checkpoint, revision=revision)
```
If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
You can do so by running the following command: `pip install -U transformers==4.35.0`.
</Tip>
<div class="flex flex-wrap space-x-1">
<a href="https://huggingface.co/models?filter=transfo-xl">
<img alt="Models" src="https://img.shields.io/badge/All_model_pages-transfo--xl-blueviolet">
......@@ -79,13 +102,13 @@ TransformerXL does **not** work with *torch.nn.DataParallel* due to a bug in PyT
## TransfoXL specific outputs
[[autodoc]] models.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
[[autodoc]] models.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
[[autodoc]] models.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
[[autodoc]] models.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
<frameworkcontent>
<pt>
......
......@@ -96,20 +96,6 @@ transformers-cli convert --model_type gpt2 \
[--finetuning_task_name OPENAI_GPT2_FINETUNED_TASK]
```
## Transformer-XL
Aquí hay un ejemplo del proceso para convertir un modelo Transformer-XL pre-entrenado (más información [aquí](https://github.com/kimiyoung/transformer-xl/tree/master/tf#obtain-and-evaluate-pretrained-sota-models)):
```bash
export TRANSFO_XL_CHECKPOINT_FOLDER_PATH=/path/to/transfo/xl/checkpoint
transformers-cli convert --model_type transfo_xl \
--tf_checkpoint $TRANSFO_XL_CHECKPOINT_FOLDER_PATH \
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
[--config TRANSFO_XL_CONFIG] \
[--finetuning_task_name TRANSFO_XL_FINETUNED_TASK]
```
## XLNet
Aquí hay un ejemplo del proceso para convertir un modelo XLNet pre-entrenado:
......
......@@ -104,21 +104,6 @@ transformers-cli convert --model_type gpt2 \
[--finetuning_task_name OPENAI_GPT2_FINETUNED_TASK]
```
## Transformer-XL
Ecco un esempio del processo di conversione di un modello Transformer-XL pre-allenato
(vedi [qui](https://github.com/kimiyoung/transformer-xl/tree/master/tf#obtain-and-evaluate-pretrained-sota-models)):
```bash
export TRANSFO_XL_CHECKPOINT_FOLDER_PATH=/path/to/transfo/xl/checkpoint
transformers-cli convert --model_type transfo_xl \
--tf_checkpoint $TRANSFO_XL_CHECKPOINT_FOLDER_PATH \
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
[--config TRANSFO_XL_CONFIG] \
[--finetuning_task_name TRANSFO_XL_FINETUNED_TASK]
```
## XLNet
Ecco un esempio del processo di conversione di un modello XLNet pre-allenato:
......
......@@ -109,20 +109,6 @@ transformers-cli convert --model_type gpt2 \
[--finetuning_task_name OPENAI_GPT2_FINETUNED_TASK]
```
## Transformer-XL
Aqui está um exemplo do processo de conversão para um modelo Transformer-XL pré-treinado (consulte [aqui](https://github.com/kimiyoung/transformer-xl/tree/master/tf#obtain-and-evaluate-pretrained-modelos-sota))
```bash
export TRANSFO_XL_CHECKPOINT_FOLDER_PATH=/path/to/transfo/xl/checkpoint
transformers-cli convert --model_type transfo_xl \
--tf_checkpoint $TRANSFO_XL_CHECKPOINT_FOLDER_PATH \
--pytorch_dump_output $PYTORCH_DUMP_OUTPUT \
[--config TRANSFO_XL_CONFIG] \
[--finetuning_task_name TRANSFO_XL_FINETUNED_TASK]
```
## XLNet
Aqui está um exemplo do processo de conversão para um modelo XLNet pré-treinado:
......
......@@ -307,6 +307,12 @@ _import_structure = {
"TRAJECTORY_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP",
"TrajectoryTransformerConfig",
],
"models.deprecated.transfo_xl": [
"TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP",
"TransfoXLConfig",
"TransfoXLCorpus",
"TransfoXLTokenizer",
],
"models.deprecated.van": ["VAN_PRETRAINED_CONFIG_ARCHIVE_MAP", "VanConfig"],
"models.deta": ["DETA_PRETRAINED_CONFIG_ARCHIVE_MAP", "DetaConfig"],
"models.detr": ["DETR_PRETRAINED_CONFIG_ARCHIVE_MAP", "DetrConfig"],
......@@ -580,12 +586,6 @@ _import_structure = {
],
"models.timesformer": ["TIMESFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "TimesformerConfig"],
"models.timm_backbone": ["TimmBackboneConfig"],
"models.transfo_xl": [
"TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP",
"TransfoXLConfig",
"TransfoXLCorpus",
"TransfoXLTokenizer",
],
"models.trocr": [
"TROCR_PRETRAINED_CONFIG_ARCHIVE_MAP",
"TrOCRConfig",
......@@ -1661,6 +1661,17 @@ else:
"TrajectoryTransformerPreTrainedModel",
]
)
_import_structure["models.deprecated.transfo_xl"].extend(
[
"TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST",
"AdaptiveEmbedding",
"TransfoXLForSequenceClassification",
"TransfoXLLMHeadModel",
"TransfoXLModel",
"TransfoXLPreTrainedModel",
"load_tf_weights_in_transfo_xl",
]
)
_import_structure["models.deprecated.van"].extend(
[
"VAN_PRETRAINED_MODEL_ARCHIVE_LIST",
......@@ -2919,17 +2930,6 @@ else:
]
)
_import_structure["models.timm_backbone"].extend(["TimmBackbone"])
_import_structure["models.transfo_xl"].extend(
[
"TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST",
"AdaptiveEmbedding",
"TransfoXLForSequenceClassification",
"TransfoXLLMHeadModel",
"TransfoXLModel",
"TransfoXLPreTrainedModel",
"load_tf_weights_in_transfo_xl",
]
)
_import_structure["models.trocr"].extend(
["TROCR_PRETRAINED_MODEL_ARCHIVE_LIST", "TrOCRForCausalLM", "TrOCRPreTrainedModel"]
)
......@@ -3525,6 +3525,17 @@ else:
"TFDeiTPreTrainedModel",
]
)
_import_structure["models.deprecated.transfo_xl"].extend(
[
"TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFAdaptiveEmbedding",
"TFTransfoXLForSequenceClassification",
"TFTransfoXLLMHeadModel",
"TFTransfoXLMainLayer",
"TFTransfoXLModel",
"TFTransfoXLPreTrainedModel",
]
)
_import_structure["models.distilbert"].extend(
[
"TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
......@@ -3889,17 +3900,6 @@ else:
"TFTapasPreTrainedModel",
]
)
_import_structure["models.transfo_xl"].extend(
[
"TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFAdaptiveEmbedding",
"TFTransfoXLForSequenceClassification",
"TFTransfoXLLMHeadModel",
"TFTransfoXLMainLayer",
"TFTransfoXLModel",
"TFTransfoXLPreTrainedModel",
]
)
_import_structure["models.vision_encoder_decoder"].extend(["TFVisionEncoderDecoderModel"])
_import_structure["models.vision_text_dual_encoder"].extend(["TFVisionTextDualEncoderModel"])
_import_structure["models.vit"].extend(
......@@ -4552,6 +4552,12 @@ if TYPE_CHECKING:
TRAJECTORY_TRANSFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP,
TrajectoryTransformerConfig,
)
from .models.deprecated.transfo_xl import (
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
TransfoXLConfig,
TransfoXLCorpus,
TransfoXLTokenizer,
)
from .models.deprecated.van import VAN_PRETRAINED_CONFIG_ARCHIVE_MAP, VanConfig
from .models.deta import DETA_PRETRAINED_CONFIG_ARCHIVE_MAP, DetaConfig
from .models.detr import DETR_PRETRAINED_CONFIG_ARCHIVE_MAP, DetrConfig
......@@ -4812,12 +4818,6 @@ if TYPE_CHECKING:
)
from .models.timesformer import TIMESFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, TimesformerConfig
from .models.timm_backbone import TimmBackboneConfig
from .models.transfo_xl import (
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
TransfoXLConfig,
TransfoXLCorpus,
TransfoXLTokenizer,
)
from .models.trocr import TROCR_PRETRAINED_CONFIG_ARCHIVE_MAP, TrOCRConfig, TrOCRProcessor
from .models.tvlt import TVLT_PRETRAINED_CONFIG_ARCHIVE_MAP, TvltConfig, TvltFeatureExtractor, TvltProcessor
from .models.tvp import (
......@@ -5746,6 +5746,15 @@ if TYPE_CHECKING:
TrajectoryTransformerModel,
TrajectoryTransformerPreTrainedModel,
)
from .models.deprecated.transfo_xl import (
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST,
AdaptiveEmbedding,
TransfoXLForSequenceClassification,
TransfoXLLMHeadModel,
TransfoXLModel,
TransfoXLPreTrainedModel,
load_tf_weights_in_transfo_xl,
)
from .models.deprecated.van import (
VAN_PRETRAINED_MODEL_ARCHIVE_LIST,
VanForImageClassification,
......@@ -6774,15 +6783,6 @@ if TYPE_CHECKING:
TimesformerPreTrainedModel,
)
from .models.timm_backbone import TimmBackbone
from .models.transfo_xl import (
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST,
AdaptiveEmbedding,
TransfoXLForSequenceClassification,
TransfoXLLMHeadModel,
TransfoXLModel,
TransfoXLPreTrainedModel,
load_tf_weights_in_transfo_xl,
)
from .models.trocr import TROCR_PRETRAINED_MODEL_ARCHIVE_LIST, TrOCRForCausalLM, TrOCRPreTrainedModel
from .models.tvlt import (
TVLT_PRETRAINED_MODEL_ARCHIVE_LIST,
......@@ -7269,6 +7269,15 @@ if TYPE_CHECKING:
TFDeiTModel,
TFDeiTPreTrainedModel,
)
from .models.deprecated.transfo_xl import (
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST,
TFAdaptiveEmbedding,
TFTransfoXLForSequenceClassification,
TFTransfoXLLMHeadModel,
TFTransfoXLMainLayer,
TFTransfoXLModel,
TFTransfoXLPreTrainedModel,
)
from .models.distilbert import (
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
TFDistilBertForMaskedLM,
......@@ -7554,15 +7563,6 @@ if TYPE_CHECKING:
TFTapasModel,
TFTapasPreTrainedModel,
)
from .models.transfo_xl import (
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST,
TFAdaptiveEmbedding,
TFTransfoXLForSequenceClassification,
TFTransfoXLLMHeadModel,
TFTransfoXLMainLayer,
TFTransfoXLModel,
TFTransfoXLPreTrainedModel,
)
from .models.vision_encoder_decoder import TFVisionEncoderDecoderModel
from .models.vision_text_dual_encoder import TFVisionTextDualEncoderModel
from .models.vit import TFViTForImageClassification, TFViTModel, TFViTPreTrainedModel
......
......@@ -123,23 +123,6 @@ class ConvertCommand(BaseTransformersCLICommand):
)
convert_openai_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
elif self._model_type == "transfo_xl":
try:
from ..models.transfo_xl.convert_transfo_xl_original_tf_checkpoint_to_pytorch import (
convert_transfo_xl_checkpoint_to_pytorch,
)
except ImportError:
raise ImportError(IMPORT_ERROR_MESSAGE)
if "ckpt" in self._tf_checkpoint.lower():
TF_CHECKPOINT = self._tf_checkpoint
TF_DATASET_FILE = ""
else:
TF_DATASET_FILE = self._tf_checkpoint
TF_CHECKPOINT = ""
convert_transfo_xl_checkpoint_to_pytorch(
TF_CHECKPOINT, self._config, self._pytorch_dump_output, TF_DATASET_FILE
)
elif self._model_type == "gpt2":
try:
from ..models.gpt2.convert_gpt2_original_tf_checkpoint_to_pytorch import (
......@@ -179,6 +162,4 @@ class ConvertCommand(BaseTransformersCLICommand):
convert_rembert_tf_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
else:
raise ValueError(
"--model_type should be selected in the list [bert, gpt, gpt2, t5, transfo_xl, xlnet, xlm, lxmert]"
)
raise ValueError("--model_type should be selected in the list [bert, gpt, gpt2, t5, xlnet, xlm, lxmert]")
......@@ -204,7 +204,6 @@ from . import (
time_series_transformer,
timesformer,
timm_backbone,
transfo_xl,
trocr,
tvlt,
tvp,
......
......@@ -706,6 +706,8 @@ MODEL_NAMES_MAPPING = OrderedDict(
]
)
# This is tied to the processing `-` -> `_` in `model_type_to_module_name`. For example, instead of putting
# `transfo-xl` (as in `CONFIG_MAPPING_NAMES`), we should use `transfo_xl`.
DEPRECATED_MODELS = [
"bort",
"mctct",
......@@ -714,6 +716,7 @@ DEPRECATED_MODELS = [
"retribert",
"tapex",
"trajectory_transformer",
"transfo_xl",
"van",
]
......
......@@ -14,7 +14,7 @@
from typing import TYPE_CHECKING
from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tf_available, is_torch_available
from ....utils import OptionalDependencyNotAvailable, _LazyModule, is_tf_available, is_torch_available
_import_structure = {
......
......@@ -15,8 +15,8 @@
# limitations under the License.
""" Transformer XL configuration"""
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ....configuration_utils import PretrainedConfig
from ....utils import logging
logger = logging.get_logger(__name__)
......@@ -74,7 +74,7 @@ class TransfoXLConfig(PretrainedConfig):
Whether or not to use adaptive softmax.
dropout (`float`, *optional*, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
dropatt (`float`, *optional*, defaults to 0):
dropatt (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
untie_r (`boolean`, *optional*, defaults to `True`):
Whether ot not to untie relative position biases.
......@@ -86,8 +86,10 @@ class TransfoXLConfig(PretrainedConfig):
Parameters initialized by N(0, init_std)
init_std (`float`, *optional*, defaults to 0.02):
Parameters initialized by N(0, init_std)
layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
layer_norm_epsilon (`float`, *optional*, defaults to 1e-05):
The epsilon to use in the layer normalization layers
eos_token_id (`int`, *optional*, defaults to 0):
End of stream token id.
Examples:
......
......@@ -23,8 +23,8 @@ import sys
import torch
from transformers import TransfoXLConfig, TransfoXLLMHeadModel, load_tf_weights_in_transfo_xl
from transformers.models.transfo_xl import tokenization_transfo_xl as data_utils
from transformers.models.transfo_xl.tokenization_transfo_xl import CORPUS_NAME, VOCAB_FILES_NAMES
from transformers.models.deprecated.transfo_xl import tokenization_transfo_xl as data_utils
from transformers.models.deprecated.transfo_xl.tokenization_transfo_xl import CORPUS_NAME, VOCAB_FILES_NAMES
from transformers.utils import CONFIG_NAME, WEIGHTS_NAME, logging
......
......@@ -25,7 +25,7 @@ from typing import List, Optional, Tuple, Union
import numpy as np
import tensorflow as tf
from ...modeling_tf_utils import (
from ....modeling_tf_utils import (
TFModelInputType,
TFPreTrainedModel,
TFSequenceClassificationLoss,
......@@ -33,8 +33,8 @@ from ...modeling_tf_utils import (
keras_serializable,
unpack_inputs,
)
from ...tf_utils import shape_list, stable_softmax
from ...utils import (
from ....tf_utils import shape_list, stable_softmax
from ....utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,
......
......@@ -20,7 +20,7 @@
import tensorflow as tf
from ...tf_utils import shape_list
from ....tf_utils import shape_list
class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
......
......@@ -25,8 +25,8 @@ import torch
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from ...modeling_utils import PreTrainedModel
from ...utils import (
from ....modeling_utils import PreTrainedModel
from ....utils import (
ModelOutput,
add_code_sample_docstrings,
add_start_docstrings,
......
......@@ -27,8 +27,8 @@ from typing import List, Optional, Tuple
import numpy as np
from ...tokenization_utils import PreTrainedTokenizer
from ...utils import (
from ....tokenization_utils import PreTrainedTokenizer
from ....utils import (
cached_file,
is_sacremoses_available,
is_torch_available,
......
......@@ -2676,6 +2676,48 @@ class TrajectoryTransformerPreTrainedModel(metaclass=DummyObject):
requires_backends(self, ["torch"])
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = None
class AdaptiveEmbedding(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TransfoXLForSequenceClassification(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TransfoXLLMHeadModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TransfoXLModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TransfoXLPreTrainedModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
def load_tf_weights_in_transfo_xl(*args, **kwargs):
requires_backends(load_tf_weights_in_transfo_xl, ["torch"])
VAN_PRETRAINED_MODEL_ARCHIVE_LIST = None
......@@ -7739,48 +7781,6 @@ class TimmBackbone(metaclass=DummyObject):
requires_backends(self, ["torch"])
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = None
class AdaptiveEmbedding(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TransfoXLForSequenceClassification(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TransfoXLLMHeadModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TransfoXLModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TransfoXLPreTrainedModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
def load_tf_weights_in_transfo_xl(*args, **kwargs):
requires_backends(load_tf_weights_in_transfo_xl, ["torch"])
TROCR_PRETRAINED_MODEL_ARCHIVE_LIST = None
......
......@@ -1075,6 +1075,51 @@ class TFDeiTPreTrainedModel(metaclass=DummyObject):
requires_backends(self, ["tf"])
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = None
class TFAdaptiveEmbedding(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLForSequenceClassification(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLLMHeadModel(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLMainLayer(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLModel(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLPreTrainedModel(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST = None
......@@ -2613,51 +2658,6 @@ class TFTapasPreTrainedModel(metaclass=DummyObject):
requires_backends(self, ["tf"])
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = None
class TFAdaptiveEmbedding(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLForSequenceClassification(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLLMHeadModel(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLMainLayer(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLModel(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFTransfoXLPreTrainedModel(metaclass=DummyObject):
_backends = ["tf"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["tf"])
class TFVisionEncoderDecoderModel(metaclass=DummyObject):
_backends = ["tf"]
......
......@@ -104,11 +104,7 @@ class GenerationTesterMixin:
if isinstance(config.eos_token_id, int):
config.eos_token_id = [config.eos_token_id]
config.pad_token_id = config.eos_token_id[0]
# TransfoXL has no attention mask
if "transfoxl" in config.__class__.__name__.lower():
attention_mask = None
else:
attention_mask = torch.ones_like(input_ids, dtype=torch.long)[:batch_size, :sequence_length]
attention_mask = torch.ones_like(input_ids, dtype=torch.long)[:batch_size, :sequence_length]
return config, input_ids, attention_mask, max_length
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment