Unverified Commit 04a17f85 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Doc fixes in preparation for the docstyle PR (#8061)

* Fixes in preparation for doc styling

* More fixes

* Better syntax

* Fixes

* Style

* More fixes

* More fixes
parent 8bbb74f2
...@@ -112,7 +112,7 @@ Example usage ...@@ -112,7 +112,7 @@ Example usage
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here is an example using the processors as well as the conversion method using data files: Here is an example using the processors as well as the conversion method using data files:
Example:: .. code-block::
# Loading a V2 processor # Loading a V2 processor
processor = SquadV2Processor() processor = SquadV2Processor()
...@@ -133,7 +133,7 @@ Example:: ...@@ -133,7 +133,7 @@ Example::
Using `tensorflow_datasets` is as easy as using a data file: Using `tensorflow_datasets` is as easy as using a data file:
Example:: .. code-block::
# tensorflow_datasets only handle Squad V1. # tensorflow_datasets only handle Squad V1.
tfds_examples = tfds.load("squad") tfds_examples = tfds.load("squad")
......
...@@ -47,7 +47,7 @@ Usage: ...@@ -47,7 +47,7 @@ Usage:
- Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g., - Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g.,
:: code-block .. code-block::
# instantiate sentence fusion model # instantiate sentence fusion model
sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse") sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
......
...@@ -28,7 +28,9 @@ Implementation Notes ...@@ -28,7 +28,9 @@ Implementation Notes
Usage Usage
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Model Usage: Here is an example of model usage:
.. code-block::
>>> from transformers import BlenderbotSmallTokenizer, BlenderbotForConditionalGeneration >>> from transformers import BlenderbotSmallTokenizer, BlenderbotForConditionalGeneration
>>> mname = 'facebook/blenderbot-90M' >>> mname = 'facebook/blenderbot-90M'
...@@ -40,7 +42,10 @@ Model Usage: ...@@ -40,7 +42,10 @@ Model Usage:
>>> print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in reply_ids]) >>> print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in reply_ids])
See Config Values: Here is how you can check out config values:
.. code-block::
>>> from transformers import BlenderbotConfig >>> from transformers import BlenderbotConfig
>>> config_90 = BlenderbotConfig.from_pretrained("facebook/blenderbot-90M") >>> config_90 = BlenderbotConfig.from_pretrained("facebook/blenderbot-90M")
......
...@@ -45,6 +45,8 @@ Note: ...@@ -45,6 +45,8 @@ Note:
If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install
``ftfy`` and ``SpaCy``:: ``ftfy`` and ``SpaCy``::
.. code-block:: bash
pip install spacy ftfy==4.4.3 pip install spacy ftfy==4.4.3
python -m spacy download en python -m spacy download en
......
# This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
# Copyright by the AllenNLP authors.
""" """
Utilities for working with the local dataset cache. Utilities for working with the local dataset cache.
This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
Copyright by the AllenNLP authors.
""" """
import copy import copy
......
...@@ -8,7 +8,8 @@ from ..utils import logging ...@@ -8,7 +8,8 @@ from ..utils import logging
def convert_command_factory(args: Namespace): def convert_command_factory(args: Namespace):
""" """
Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint. Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.
:return: ServeCommand
Returns: ServeCommand
""" """
return ConvertCommand( return ConvertCommand(
args.model_type, args.tf_checkpoint, args.pytorch_dump_output, args.config, args.finetuning_task_name args.model_type, args.tf_checkpoint, args.pytorch_dump_output, args.config, args.finetuning_task_name
...@@ -26,8 +27,9 @@ class ConvertCommand(BaseTransformersCLICommand): ...@@ -26,8 +27,9 @@ class ConvertCommand(BaseTransformersCLICommand):
def register_subcommand(parser: ArgumentParser): def register_subcommand(parser: ArgumentParser):
""" """
Register this command to argparse so it's available for the transformer-cli Register this command to argparse so it's available for the transformer-cli
:param parser: Root parser to register command-specific arguments
:return: Args:
parser: Root parser to register command-specific arguments
""" """
train_parser = parser.add_parser( train_parser = parser.add_parser(
"convert", "convert",
......
...@@ -31,7 +31,8 @@ logger = logging.get_logger("transformers-cli/serving") ...@@ -31,7 +31,8 @@ logger = logging.get_logger("transformers-cli/serving")
def serve_command_factory(args: Namespace): def serve_command_factory(args: Namespace):
""" """
Factory function used to instantiate serving server from provided command line arguments. Factory function used to instantiate serving server from provided command line arguments.
:return: ServeCommand
Returns: ServeCommand
""" """
nlp = pipeline( nlp = pipeline(
task=args.task, task=args.task,
...@@ -81,8 +82,9 @@ class ServeCommand(BaseTransformersCLICommand): ...@@ -81,8 +82,9 @@ class ServeCommand(BaseTransformersCLICommand):
def register_subcommand(parser: ArgumentParser): def register_subcommand(parser: ArgumentParser):
""" """
Register this command to argparse so it's available for the transformer-cli Register this command to argparse so it's available for the transformer-cli
:param parser: Root parser to register command-specific arguments
:return: Args:
parser: Root parser to register command-specific arguments
""" """
serve_parser = parser.add_parser( serve_parser = parser.add_parser(
"serve", help="CLI tool to run inference requests through REST and GraphQL endpoints." "serve", help="CLI tool to run inference requests through REST and GraphQL endpoints."
......
...@@ -19,7 +19,8 @@ USE_AMP = False ...@@ -19,7 +19,8 @@ USE_AMP = False
def train_command_factory(args: Namespace): def train_command_factory(args: Namespace):
""" """
Factory function used to instantiate training command from provided command line arguments. Factory function used to instantiate training command from provided command line arguments.
:return: TrainCommand
Returns: TrainCommand
""" """
return TrainCommand(args) return TrainCommand(args)
...@@ -29,8 +30,9 @@ class TrainCommand(BaseTransformersCLICommand): ...@@ -29,8 +30,9 @@ class TrainCommand(BaseTransformersCLICommand):
def register_subcommand(parser: ArgumentParser): def register_subcommand(parser: ArgumentParser):
""" """
Register this command to argparse so it's available for the transformer-cli Register this command to argparse so it's available for the transformer-cli
:param parser: Root parser to register command-specific arguments
:return: Args:
parser: Root parser to register command-specific arguments
""" """
train_parser = parser.add_parser("train", help="CLI tool to train a model on a task.") train_parser = parser.add_parser("train", help="CLI tool to train a model on a task.")
......
...@@ -70,7 +70,7 @@ class BaseUserCommand: ...@@ -70,7 +70,7 @@ class BaseUserCommand:
class LoginCommand(BaseUserCommand): class LoginCommand(BaseUserCommand):
def run(self): def run(self):
print( print( # docstyle-ignore
""" """
_| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|
_| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
...@@ -127,8 +127,9 @@ class ListObjsCommand(BaseUserCommand): ...@@ -127,8 +127,9 @@ class ListObjsCommand(BaseUserCommand):
def tabulate(self, rows: List[List[Union[str, int]]], headers: List[str]) -> str: def tabulate(self, rows: List[List[Union[str, int]]], headers: List[str]) -> str:
""" """
Inspired by: Inspired by:
stackoverflow.com/a/8356620/593036
stackoverflow.com/questions/9535954/printing-lists-as-tabular-data - stackoverflow.com/a/8356620/593036
- stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
""" """
col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)] col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
row_format = ("{{:{}}} " * len(headers)).format(*col_widths) row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
......
...@@ -28,19 +28,19 @@ from transformers import BertModel ...@@ -28,19 +28,19 @@ from transformers import BertModel
def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, model_name: str): def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, model_name: str):
""" """
:param model:BertModel Pytorch model instance to be converted Args
:param ckpt_dir: Tensorflow model directory model: BertModel Pytorch model instance to be converted
:param model_name: model name ckpt_dir: Tensorflow model directory
:return: model_name: model name
Currently supported HF models: Currently supported HF models:
Y BertModel - Y BertModel
N BertForMaskedLM - N BertForMaskedLM
N BertForPreTraining - N BertForPreTraining
N BertForMultipleChoice - N BertForMultipleChoice
N BertForNextSentencePrediction - N BertForNextSentencePrediction
N BertForSequenceClassification - N BertForSequenceClassification
N BertForQuestionAnswering - N BertForQuestionAnswering
""" """
tensors_to_transpose = ("dense.weight", "attention.self.query", "attention.self.key", "attention.self.value") tensors_to_transpose = ("dense.weight", "attention.self.query", "attention.self.key", "attention.self.value")
......
...@@ -28,11 +28,13 @@ LANG_CODE_PATH = "lang_code_data/language-codes-3b2.csv" ...@@ -28,11 +28,13 @@ LANG_CODE_PATH = "lang_code_data/language-codes-3b2.csv"
class TatoebaConverter: class TatoebaConverter:
"""Convert Tatoeba-Challenge models to huggingface format. """Convert Tatoeba-Challenge models to huggingface format.
Steps: Steps:
(1) convert numpy state dict to hf format (same code as OPUS-MT-Train conversion).
(2) rename opus model to huggingface format. This means replace each alpha3 code with an alpha2 code if a unique one existes. 1. convert numpy state dict to hf format (same code as OPUS-MT-Train conversion).
e.g. aav-eng -> aav-en, heb-eng -> he-en 2. rename opus model to huggingface format. This means replace each alpha3 code with an alpha2 code if a unique one existes.
(3) write a model card containing the original Tatoeba-Challenge/README.md and extra info about alpha3 group members. e.g. aav-eng -> aav-en, heb-eng -> he-en
3. write a model card containing the original Tatoeba-Challenge/README.md and extra info about alpha3 group members.
""" """
def __init__(self, save_dir="marian_converted"): def __init__(self, save_dir="marian_converted"):
......
...@@ -19,14 +19,12 @@ DataCollator = NewType("DataCollator", Callable[[List[InputDataClass]], Dict[str ...@@ -19,14 +19,12 @@ DataCollator = NewType("DataCollator", Callable[[List[InputDataClass]], Dict[str
def default_data_collator(features: List[InputDataClass]) -> Dict[str, torch.Tensor]: def default_data_collator(features: List[InputDataClass]) -> Dict[str, torch.Tensor]:
""" """
Very simple data collator that: Very simple data collator that simply collates batches of dict-like objects and erforms special handling for potential keys named:
- simply collates batches of dict-like objects
- Performs special handling for potential keys named:
- ``label``: handles a single value (int or float) per object - ``label``: handles a single value (int or float) per object
- ``label_ids``: handles a list of values per object - ``label_ids``: handles a list of values per object
- does not do any additional preprocessing
i.e., Property names of the input object will be used as corresponding inputs to the model. Des not do any additional preprocessing: property names of the input object will be used as corresponding inputs to the model.
See glue and ner for example of how it's useful. See glue and ner for example of how it's useful.
""" """
...@@ -425,6 +423,7 @@ class DataCollatorForPermutationLanguageModeling: ...@@ -425,6 +423,7 @@ class DataCollatorForPermutationLanguageModeling:
def mask_tokens(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: def mask_tokens(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
""" """
The masked tokens to be predicted for a particular sequence are determined by the following algorithm: The masked tokens to be predicted for a particular sequence are determined by the following algorithm:
0. Start from the beginning of the sequence by setting ``cur_len = 0`` (number of tokens processed so far). 0. Start from the beginning of the sequence by setting ``cur_len = 0`` (number of tokens processed so far).
1. Sample a ``span_length`` from the interval ``[1, max_span_length]`` (length of span of tokens to be masked) 1. Sample a ``span_length`` from the interval ``[1, max_span_length]`` (length of span of tokens to be masked)
2. Reserve a context of length ``context_length = span_length / plm_probability`` to surround span to be masked 2. Reserve a context of length ``context_length = span_length / plm_probability`` to surround span to be masked
......
...@@ -289,6 +289,7 @@ def torch_only_method(fn): ...@@ -289,6 +289,7 @@ def torch_only_method(fn):
return wrapper return wrapper
# docstyle-ignore
DATASETS_IMPORT_ERROR = """ DATASETS_IMPORT_ERROR = """
{0} requires the 🤗 Datasets library but it was not found in your enviromnent. You can install it with: {0} requires the 🤗 Datasets library but it was not found in your enviromnent. You can install it with:
``` ```
...@@ -306,6 +307,7 @@ that python file if that's the case. ...@@ -306,6 +307,7 @@ that python file if that's the case.
""" """
# docstyle-ignore
TOKENIZERS_IMPORT_ERROR = """ TOKENIZERS_IMPORT_ERROR = """
{0} requires the 🤗 Tokenizers library but it was not found in your enviromnent. You can install it with: {0} requires the 🤗 Tokenizers library but it was not found in your enviromnent. You can install it with:
``` ```
...@@ -318,6 +320,7 @@ In a notebook or a colab, you can install it by executing a cell with ...@@ -318,6 +320,7 @@ In a notebook or a colab, you can install it by executing a cell with
""" """
# docstyle-ignore
SENTENCEPIECE_IMPORT_ERROR = """ SENTENCEPIECE_IMPORT_ERROR = """
{0} requires the SentencePiece library but it was not found in your enviromnent. Checkout the instructions on the {0} requires the SentencePiece library but it was not found in your enviromnent. Checkout the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
...@@ -325,6 +328,7 @@ that match your enviromnent. ...@@ -325,6 +328,7 @@ that match your enviromnent.
""" """
# docstyle-ignore
FAISS_IMPORT_ERROR = """ FAISS_IMPORT_ERROR = """
{0} requires the faiss library but it was not found in your enviromnent. Checkout the instructions on the {0} requires the faiss library but it was not found in your enviromnent. Checkout the instructions on the
installation page of its repo: https://github.com/facebookresearch/faiss/blob/master/INSTALL.md and follow the ones installation page of its repo: https://github.com/facebookresearch/faiss/blob/master/INSTALL.md and follow the ones
...@@ -332,12 +336,14 @@ that match your enviromnent. ...@@ -332,12 +336,14 @@ that match your enviromnent.
""" """
# docstyle-ignore
PYTORCH_IMPORT_ERROR = """ PYTORCH_IMPORT_ERROR = """
{0} requires the PyTorch library but it was not found in your enviromnent. Checkout the instructions on the {0} requires the PyTorch library but it was not found in your enviromnent. Checkout the instructions on the
installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your enviromnent. installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your enviromnent.
""" """
# docstyle-ignore
SKLEARN_IMPORT_ERROR = """ SKLEARN_IMPORT_ERROR = """
{0} requires the scikit-learn library but it was not found in your enviromnent. You can install it with: {0} requires the scikit-learn library but it was not found in your enviromnent. You can install it with:
``` ```
...@@ -350,12 +356,14 @@ In a notebook or a colab, you can install it by executing a cell with ...@@ -350,12 +356,14 @@ In a notebook or a colab, you can install it by executing a cell with
""" """
# docstyle-ignore
TENSORFLOW_IMPORT_ERROR = """ TENSORFLOW_IMPORT_ERROR = """
{0} requires the TensorFlow library but it was not found in your enviromnent. Checkout the instructions on the {0} requires the TensorFlow library but it was not found in your enviromnent. Checkout the instructions on the
installation page: https://www.tensorflow.org/install and follow the ones that match your enviromnent. installation page: https://www.tensorflow.org/install and follow the ones that match your enviromnent.
""" """
# docstyle-ignore
FLAX_IMPORT_ERROR = """ FLAX_IMPORT_ERROR = """
{0} requires the FLAX library but it was not found in your enviromnent. Checkout the instructions on the {0} requires the FLAX library but it was not found in your enviromnent. Checkout the instructions on the
installation page: https://github.com/google/flax and follow the ones that match your enviromnent. installation page: https://github.com/google/flax and follow the ones that match your enviromnent.
......
...@@ -917,7 +917,7 @@ def _create_next_token_logits_penalties(input_ids, logits, repetition_penalty): ...@@ -917,7 +917,7 @@ def _create_next_token_logits_penalties(input_ids, logits, repetition_penalty):
def calc_banned_ngram_tokens(prev_input_ids, num_hypos, no_repeat_ngram_size, cur_len): def calc_banned_ngram_tokens(prev_input_ids, num_hypos, no_repeat_ngram_size, cur_len):
# Copied from fairseq for no_repeat_ngram in beam_search""" # Copied from fairseq for no_repeat_ngram in beam_search
if cur_len + 1 < no_repeat_ngram_size: if cur_len + 1 < no_repeat_ngram_size:
# return no banned tokens if we haven't generated no_repeat_ngram_size tokens yet # return no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
return [[] for _ in range(num_hypos)] return [[] for _ in range(num_hypos)]
......
...@@ -857,16 +857,16 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -857,16 +857,16 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
**kwargs, **kwargs,
): ):
r""" r"""
mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input):
Index of the classification token in each input sequence. Index of the classification token in each input sequence.
Selected in the range ``[0, input_ids.size(-1) - 1[``. Selected in the range ``[0, input_ids.size(-1) - 1[``.
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`) labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for language modeling. Labels for language modeling.
Note that the labels **are shifted** inside the model, i.e. you can set ``labels = input_ids`` Note that the labels **are shifted** inside the model, i.e. you can set ``labels = input_ids``
Indices are selected in ``[-1, 0, ..., config.vocab_size]`` Indices are selected in ``[-1, 0, ..., config.vocab_size]``
All labels set to ``-100`` are ignored (masked), the loss is only All labels set to ``-100`` are ignored (masked), the loss is only
computed for labels in ``[0, ..., config.vocab_size]`` computed for labels in ``[0, ..., config.vocab_size]``
mc_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size)`, `optional`) mc_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size)`, `optional`):
Labels for computing the multiple choice classification loss. Labels for computing the multiple choice classification loss.
Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension
of the input tensors. (see `input_ids` above) of the input tensors. (see `input_ids` above)
......
...@@ -105,8 +105,10 @@ def create_position_ids_from_input_ids(input_ids, padding_idx): ...@@ -105,8 +105,10 @@ def create_position_ids_from_input_ids(input_ids, padding_idx):
padding_idx+1. Padding symbols are ignored. This is modified from fairseq's padding_idx+1. Padding symbols are ignored. This is modified from fairseq's
`utils.make_positions`. `utils.make_positions`.
:param torch.Tensor x: Args:
:return torch.Tensor: x: torch.Tensor x:
Returns: torch.Tensor
""" """
# The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA. # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
mask = input_ids.ne(padding_idx).int() mask = input_ids.ne(padding_idx).int()
...@@ -176,8 +178,10 @@ class LongformerEmbeddings(nn.Module): ...@@ -176,8 +178,10 @@ class LongformerEmbeddings(nn.Module):
"""We are provided embeddings directly. We cannot infer which are padded so just generate """We are provided embeddings directly. We cannot infer which are padded so just generate
sequential position ids. sequential position ids.
:param torch.Tensor inputs_embeds: Args:
:return torch.Tensor: inputs_embeds: torch.Tensor inputs_embeds:
Returns: torch.Tensor
""" """
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
sequence_length = input_shape[1] sequence_length = input_shape[1]
......
...@@ -647,16 +647,16 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -647,16 +647,16 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
**kwargs **kwargs
): ):
r""" r"""
mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input):
Index of the classification token in each input sequence. Index of the classification token in each input sequence.
Selected in the range ``[0, input_ids.size(-1) - 1]``. Selected in the range ``[0, input_ids.size(-1) - 1]``.
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`) labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for language modeling. Labels for language modeling.
Note that the labels **are shifted** inside the model, i.e. you can set ``labels = input_ids`` Note that the labels **are shifted** inside the model, i.e. you can set ``labels = input_ids``
Indices are selected in ``[-1, 0, ..., config.vocab_size]`` Indices are selected in ``[-1, 0, ..., config.vocab_size]``
All labels set to ``-100`` are ignored (masked), the loss is only All labels set to ``-100`` are ignored (masked), the loss is only
computed for labels in ``[0, ..., config.vocab_size]`` computed for labels in ``[0, ..., config.vocab_size]``
mc_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size)`, `optional`) mc_labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size)`, `optional`):
Labels for computing the multiple choice classification loss. Labels for computing the multiple choice classification loss.
Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension
of the input tensors. (see `input_ids` above) of the input tensors. (see `input_ids` above)
......
...@@ -127,8 +127,10 @@ class RobertaEmbeddings(nn.Module): ...@@ -127,8 +127,10 @@ class RobertaEmbeddings(nn.Module):
"""We are provided embeddings directly. We cannot infer which are padded so just generate """We are provided embeddings directly. We cannot infer which are padded so just generate
sequential position ids. sequential position ids.
:param torch.Tensor inputs_embeds: Args:
:return torch.Tensor: inputs_embeds: torch.Tensor
Returns: torch.Tensor
""" """
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
sequence_length = input_shape[1] sequence_length = input_shape[1]
...@@ -1326,8 +1328,10 @@ def create_position_ids_from_input_ids(input_ids, padding_idx): ...@@ -1326,8 +1328,10 @@ def create_position_ids_from_input_ids(input_ids, padding_idx):
padding_idx+1. Padding symbols are ignored. This is modified from fairseq's padding_idx+1. Padding symbols are ignored. This is modified from fairseq's
`utils.make_positions`. `utils.make_positions`.
:param torch.Tensor x: Args:
:return torch.Tensor: x: torch.Tensor x:
Returns: torch.Tensor
""" """
# The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA. # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
mask = input_ids.ne(padding_idx).int() mask = input_ids.ne(padding_idx).int()
......
...@@ -704,7 +704,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): ...@@ -704,7 +704,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
training=False, training=False,
): ):
r""" r"""
mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input):
Index of the classification token in each input sequence. Index of the classification token in each input sequence.
Selected in the range ``[0, input_ids.size(-1) - 1[``. Selected in the range ``[0, input_ids.size(-1) - 1[``.
......
...@@ -166,8 +166,11 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer): ...@@ -166,8 +166,11 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer):
"""Replace non-padding symbols with their position numbers. Position numbers begin at """Replace non-padding symbols with their position numbers. Position numbers begin at
padding_idx+1. Padding symbols are ignored. This is modified from fairseq's padding_idx+1. Padding symbols are ignored. This is modified from fairseq's
`utils.make_positions`. `utils.make_positions`.
:param tf.Tensor x:
:return tf.Tensor: Args:
x: tf.Tensor
Returns: tf.Tensor
""" """
mask = tf.cast(tf.math.not_equal(x, self.padding_idx), dtype=tf.int32) mask = tf.cast(tf.math.not_equal(x, self.padding_idx), dtype=tf.int32)
incremental_indicies = tf.math.cumsum(mask, axis=1) * mask incremental_indicies = tf.math.cumsum(mask, axis=1) * mask
...@@ -177,8 +180,11 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer): ...@@ -177,8 +180,11 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer):
def create_position_ids_from_inputs_embeds(self, inputs_embeds): def create_position_ids_from_inputs_embeds(self, inputs_embeds):
"""We are provided embeddings directly. We cannot infer which are padded so just generate """We are provided embeddings directly. We cannot infer which are padded so just generate
sequential position ids. sequential position ids.
:param tf.Tensor inputs_embeds:
:return tf.Tensor: Args:
inputs_embeds: tf.Tensor
Returns: tf.Tensor
""" """
seq_length = shape_list(inputs_embeds)[1] seq_length = shape_list(inputs_embeds)[1]
position_ids = tf.range(self.padding_idx + 1, seq_length + self.padding_idx + 1, dtype=tf.int32)[tf.newaxis, :] position_ids = tf.range(self.padding_idx + 1, seq_length + self.padding_idx + 1, dtype=tf.int32)[tf.newaxis, :]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment