Unverified Commit 088c1880 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Big file_utils cleanup (#16396)

* Big file_utils cleanup

* This one still needs to be treated separately
parent 2b23e080
......@@ -69,7 +69,7 @@ PRETRAINED_VOCAB_FILES_MAP = {
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {"facebook/wav2vec2-base-960h": sys.maxsize}
WAV2VEC2_KWARGS_DOCSTRING = r"""
padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`):
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
Activates and controls padding. Accepts the following values:
- `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
......@@ -87,7 +87,7 @@ WAV2VEC2_KWARGS_DOCSTRING = r"""
pad_to_multiple_of (`int`, *optional*):
If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
return_tensors (`str` or [`~utils.TensorType`], *optional*):
If set, will return tensors instead of list of python integers. Acceptable values are:
- `'tf'`: Return TensorFlow `tf.constant` objects.
......
......@@ -1176,7 +1176,7 @@ WAVLM_INPUTS_DOCSTRING = r"""
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......
......@@ -107,7 +107,7 @@ XGLM_INPUTS_DOCSTRING = r"""
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......
......@@ -110,7 +110,7 @@ XGLM_INPUTS_DOCSTRING = r"""
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......@@ -669,7 +669,7 @@ class XGLMModel(XGLMPreTrainedModel):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
......
......@@ -667,8 +667,8 @@ XLM_INPUTS_DOCSTRING = r"""
more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
used instead.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used
in eager mode, in graph mode the value will always be set to True.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
eager mode, in graph mode the value will always be set to True.
training (`bool`, *optional*, defaults to `False`):
Whether or not to use the model in training mode (some modules like dropout modules have different
behaviors between training and evaluation).
......
......@@ -390,7 +390,7 @@ XLM_INPUTS_DOCSTRING = r"""
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......
......@@ -661,7 +661,7 @@ XLM_ROBERTA_XL_INPUTS_DOCSTRING = r"""
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......
......@@ -1115,7 +1115,7 @@ XLNET_INPUTS_DOCSTRING = r"""
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......
......@@ -927,7 +927,7 @@ XLNET_INPUTS_DOCSTRING = r"""
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......
......@@ -737,7 +737,7 @@ YOSO_INPUTS_DOCSTRING = r"""
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......
......@@ -286,7 +286,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
inference to be done sequentially to extract relations within sequences, given their conversational
nature.
padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`):
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
Activates and controls padding. Accepts the following values:
- `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
......
......@@ -120,7 +120,7 @@ class ProcessorMixin(PushToHubMixin):
</Tip>
kwargs:
Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method.
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
"""
if push_to_hub:
commit_message = kwargs.pop("commit_message", None)
......
......@@ -26,7 +26,7 @@ from transformers.utils import cached_property, is_sagemaker_dp_enabled, logging
logger = logging.get_logger(__name__)
# TODO: should be moved to `file_utils` after refactoring of SageMakerTrainer
# TODO: should be moved to `utils` after refactoring of SageMakerTrainer
def is_sagemaker_model_parallel_available():
......
......@@ -646,9 +646,9 @@ class BatchEncoding(UserDict):
Convert the inner content to tensors.
Args:
tensor_type (`str` or [`~file_utils.TensorType`], *optional*):
The type of tensors to use. If `str`, should be one of the values of the enum
[`~file_utils.TensorType`]. If `None`, no modification is done.
tensor_type (`str` or [`~utils.TensorType`], *optional*):
The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
`None`, no modification is done.
prepend_batch_axis (`int`, *optional*, defaults to `False`):
Whether or not to add the batch dimension during the conversion.
"""
......@@ -1253,7 +1253,7 @@ class SpecialTokensMixin:
ENCODE_KWARGS_DOCSTRING = r"""
add_special_tokens (`bool`, *optional*, defaults to `True`):
Whether or not to encode the sequences with the special tokens relative to their model.
padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`):
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
Activates and controls padding. Accepts the following values:
- `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
......@@ -1295,7 +1295,7 @@ ENCODE_KWARGS_DOCSTRING = r"""
pad_to_multiple_of (`int`, *optional*):
If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
return_tensors (`str` or [`~utils.TensorType`], *optional*):
If set, will return tensors instead of list of python integers. Acceptable values are:
- `'tf'`: Return TensorFlow `tf.constant` objects.
......@@ -2731,7 +2731,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
Instead of `List[int]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors), see
the note above for the return type.
padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding
index) among:
......@@ -2753,7 +2753,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
to the specific tokenizer's default, defined by the `return_outputs` attribute.
[What are attention masks?](../glossary#attention-mask)
return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
return_tensors (`str` or [`~utils.TensorType`], *optional*):
If set, will return tensors instead of list of python integers. Acceptable values are:
- `'tf'`: Return TensorFlow `tf.constant` objects.
......@@ -3453,7 +3453,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
max_target_length (`int`, *optional*):
Controls the maximum length of decoder inputs (target language texts or summaries) If left unset or set
to `None`, this will use the max_length value.
padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`):
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
Activates and controls padding. Accepts the following values:
- `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
......@@ -3462,7 +3462,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
acceptable input length for the model if that argument is not provided.
- `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
lengths).
return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
return_tensors (`str` or [`~utils.TensorType`], *optional*):
If set, will return tensors instead of list of python integers. Acceptable values are:
- `'tf'`: Return TensorFlow `tf.constant` objects.
......
......@@ -334,7 +334,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
section.
Args:
padding_strategy ([`~file_utils.PaddingStrategy`]):
padding_strategy ([`~utils.PaddingStrategy`]):
The kind of padding that will be applied to the input
truncation_strategy ([`~tokenization_utils_base.TruncationStrategy`]):
The kind of truncation that will be applied to the input
......
......@@ -150,8 +150,8 @@ class ModelOutput(OrderedDict):
<Tip warning={true}>
You can't unpack a `ModelOutput` directly. Use the [`~file_utils.ModelOutput.to_tuple`] method to convert it to a
tuple before.
You can't unpack a `ModelOutput` directly. Use the [`~utils.ModelOutput.to_tuple`] method to convert it to a tuple
before.
</Tip>
"""
......
......@@ -535,7 +535,7 @@ to make your debugging environment as efficient as possible.
due to multiple dropout layers in the model. Make sure that the
forward pass in your debugging environment is **deterministic** so
that the dropout layers are not used. Or use
`transformers.file_utils.set_seed` if the old and new
`transformers.utils.set_seed` if the old and new
implementations are in the same framework.
#### More details on how to create a debugging environment for [camelcase name of model]
......
......@@ -119,7 +119,7 @@ _TOKENIZER_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Tokenizer"
- 0 indicates the head is **masked**.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......@@ -1244,7 +1244,7 @@ _TOKENIZER_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Tokenizer"
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......@@ -1275,7 +1275,7 @@ _TOKENIZER_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Tokenizer"
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
{{cookiecutter.uppercase_modelname}}_DECODE_INPUTS_DOCSTRING = r"""
......@@ -1322,7 +1322,7 @@ _TOKENIZER_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Tokenizer"
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
def shift_tokens_right(input_ids: jnp.ndarray, pad_token_id: int, decoder_start_token_id: int) -> jnp.ndarray:
......
......@@ -925,7 +925,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
used instead.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This
argument can be used in eager mode, in graph mode the value will always be set to True.
training (`bool`, *optional*, defaults to `False`):
Whether or not to use the model in training mode (some modules like dropout modules have different
......@@ -2338,7 +2338,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
used instead.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This
argument can be used in eager mode, in graph mode the value will always be set to True.
training (`bool`, *optional*, defaults to `False`):
Whether or not to use the model in training mode (some modules like dropout modules have different
......@@ -2429,7 +2429,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
for more detail. This argument can be used only in eager mode, in graph mode the value in the config
will be used instead.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This
argument can be used in eager mode, in graph mode the value will always be set to True.
training (`bool`, *optional*, defaults to `False`):
Whether or not to use the model in training mode (some modules like dropout modules have different
......@@ -2626,7 +2626,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
for more detail. This argument can be used only in eager mode, in graph mode the value in the config
will be used instead.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This
argument can be used in eager mode, in graph mode the value will always be set to True.
training (`bool`, *optional*, defaults to `False`):
Whether or not to use the model in training mode (some modules like dropout modules have different
......
......@@ -746,7 +746,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......@@ -2157,7 +2157,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......@@ -2186,7 +2186,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
......@@ -2272,7 +2272,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
......@@ -2494,7 +2494,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
......@@ -3270,7 +3270,7 @@ class {{cookiecutter.camelcase_modelname}}ForCausalLM({{cookiecutter.camelcase_m
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
Returns:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment