Doc styling fixes (#8074)

* Fix a few docstrings * More fixes * Styling

Doc styling fixes (#8074)
* Fix a few docstrings * More fixes * Styling
c42596bc · Sylvain Gugger · GitHub · 1496931b · c42596bc · c42596bc
Unverified Commit c42596bc authored Oct 27, 2020 by Sylvain Gugger Committed by GitHub Oct 27, 2020
6 changed files
--- a/src/transformers/tokenization_camembert_fast.py
+++ b/src/transformers/tokenization_camembert_fast.py
@@ -66,30 +66,31 @@ class CamembertTokenizerFast(PreTrainedTokenizerFast):
    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main
    methods. Users should refer to this superclass for more information regarding those methods.
-    vocab_file (:obj:`str`): `SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm`
+    Args:
-    extension) that contains the vocabulary necessary to instantiate a tokenizer. bos_token (:obj:`str`, `optional`,
+        vocab_file (:obj:`str`):
-    defaults to :obj:`"<s>"`): The beginning of sequence token that was used during pretraining. Can be used a sequence
+            `SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that
-    classifier token.
+            contains the vocabulary necessary to instantiate a tokenizer.
+        bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
+            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
            .. note::
-                When building a sequence using special tokens, this is not the token that is used for the beginning
+                When building a sequence using special tokens, this is not the token that is used for the beginning of
-                of sequence. The token used is the :obj:`cls_token`.
+                sequence. The token used is the :obj:`cls_token`.
        eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
            The end of sequence token.
            .. note::
-                When building a sequence using special tokens, this is not the token that is used for the end
+                When building a sequence using special tokens, this is not the token that is used for the end of
-                of sequence. The token used is the :obj:`sep_token`.
+                sequence. The token used is the :obj:`sep_token`.
        sep_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
-            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
+            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
-            for sequence classification or for a text and a question for question answering.
+            sequence classification or for a text and a question for question answering. It is also used as the last
-            It is also used as the last token of a sequence built with special tokens.
+            token of a sequence built with special tokens.
        cls_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
-            The classifier token which is used when doing sequence classification (classification of the whole
+            The classifier token which is used when doing sequence classification (classification of the whole sequence
-            sequence instead of per-token classification). It is the first token of the sequence when built with
+            instead of per-token classification). It is the first token of the sequence when built with special tokens.
-            special tokens.
        unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.

--- a/src/transformers/tokenization_dpr.py
+++ b/src/transformers/tokenization_dpr.py
@@ -131,8 +131,8 @@ DPRReaderOutput = collections.namedtuple("DPRReaderOutput", ["start_logits", "en
 CUSTOM_DPR_READER_DOCSTRING = r"""
    Return a dictionary with the token ids of the input strings and other information to give to
    :obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
-        sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of
+    sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of size
-        size :obj:`(n_passages, sequence_length)` with the format:
+    :obj:`(n_passages, sequence_length)` with the format:
    ::
@@ -189,7 +189,7 @@ CUSTOM_DPR_READER_DOCSTRING = r"""
            `What are attention masks? <../glossary.html#attention-mask>`__
-    Return:
+    Returns:
        :obj:`Dict[str, List[List[int]]]`: A dictionary with the following keys:
        - ``input_ids``: List of token ids to be fed to a model.

--- a/src/transformers/tokenization_dpr_fast.py
+++ b/src/transformers/tokenization_dpr_fast.py
@@ -134,8 +134,8 @@ DPRReaderOutput = collections.namedtuple("DPRReaderOutput", ["start_logits", "en
 CUSTOM_DPR_READER_DOCSTRING = r"""
    Return a dictionary with the token ids of the input strings and other information to give to
    :obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
-        sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of
+    sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of size
-        size :obj:`(n_passages, sequence_length)` with the format:
+    :obj:`(n_passages, sequence_length)` with the format:
    [CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>

--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -194,18 +194,21 @@ class Trainer:
            The function may have zero argument, or a single one containing the optuna/Ray Tune trial object, to be
            able to choose different architectures according to hyper parameters (such as layer count, sizes of inner
-            layers, dropout probabilities etc). compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
+            layers, dropout probabilities etc).
+        compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
            The function that will be used to compute metrics at evaluation. Must take a
-            :class:`~transformers.EvalPrediction` and return a dictionary string to metric values. callbacks (List of
+            :class:`~transformers.EvalPrediction` and return a dictionary string to metric values.
-            :obj:`~transformers.TrainerCallback`, `optional`): A list of callbacks to customize the training loop. Will
+        callbacks (List of :obj:`~transformers.TrainerCallback`, `optional`):
-            add those to the list of default callbacks detailed in :doc:`here <callback>`.
+            A list of callbacks to customize the training loop. Will add those to the list of default callbacks
+            detailed in :doc:`here <callback>`.
            If you want to remove one of the default callbacks used, use the :meth:`Trainer.remove_callback` method.
        optimizers (:obj:`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR`, `optional`): A tuple
            containing the optimizer and the scheduler to use. Will default to an instance of
            :class:`~transformers.AdamW` on your model and a scheduler given by
-            :func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`. kwargs: Deprecated keyword
+            :func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`.
-            arguments.
+        kwargs:
+            Deprecated keyword arguments.
    """
    def __init__(

--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -144,29 +144,31 @@ class TrainingArguments:
            If using `nlp.Dataset` datasets, whether or not to automatically remove the columns unused by the model
            forward method.
-            (Note that this behavior is not implemented for :class:`~transformers.TFTrainer` yet.) label_names
+            (Note that this behavior is not implemented for :class:`~transformers.TFTrainer` yet.)
-            (:obj:`List[str]`, `optional`): The list of keys in your dictionary of inputs that correspond to the
+        label_names (:obj:`List[str]`, `optional`):
-            labels.
+            The list of keys in your dictionary of inputs that correspond to the labels.
            Will eventually default to :obj:`["labels"]` except if the model used is one of the
            :obj:`XxxForQuestionAnswering` in which case it will default to :obj:`["start_positions",
-            "end_positions"]`. load_best_model_at_end (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or
+            "end_positions"]`.
-            not to load the best model found during training at the end of training.
+        load_best_model_at_end (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            Whether or not to load the best model found during training at the end of training.
            .. note::
                When set to :obj:`True`, the parameters :obj:`save_steps` will be ignored and the model will be saved
                after each evaluation.
-        metric_for_best_model (:obj:`str`, `optional`)
+        metric_for_best_model (:obj:`str`, `optional`):
            Use in conjunction with :obj:`load_best_model_at_end` to specify the metric to use to compare two different
            models. Must be the name of a metric returned by the evaluation with or without the prefix :obj:`"eval_"`.
            Will default to :obj:`"loss"` if unspecified and :obj:`load_best_model_at_end=True` (to use the evaluation
            loss).
            If you set this value, :obj:`greater_is_better` will default to :obj:`True`. Don't forget to set it to
-            :obj:`False` if your metric is better when lower. greater_is_better (:obj:`bool`, `optional`) Use in
+            :obj:`False` if your metric is better when lower.
-            conjunction with :obj:`load_best_model_at_end` and :obj:`metric_for_best_model` to specify if better models
+        greater_is_better (:obj:`bool`, `optional`):
-            should have a greater metric or not. Will default to:
+            Use in conjunction with :obj:`load_best_model_at_end` and :obj:`metric_for_best_model` to specify if better
+            models should have a greater metric or not. Will default to:
            - :obj:`True` if :obj:`metric_for_best_model` is set to a value that isn't :obj:`"loss"` or
              :obj:`"eval_loss"`.

--- a/utils/style_doc.py
+++ b/utils/style_doc.py
@@ -312,10 +312,11 @@ class DocstringStyler(CodeStyler):
    """Class to style docstrings that take the main method from `CodeStyler`."""
    def is_no_style_block(self, line):
+        if _re_textual_blocks.search(line) is not None:
+            return False
        if _re_example.search(line) is not None:
            return True
        return _re_code_block.search(line) is not None
-        # return super().is_no_style_block(line) is not None
    def is_comment_or_textual_block(self, line):
        if _re_return.search(line) is not None: