Doc styling fixes (#8074)

* Fix a few docstrings * More fixes * Styling

Doc styling fixes (#8074)
* Fix a few docstrings * More fixes * Styling
c42596bc · Sylvain Gugger · GitHub · 1496931b · c42596bc · c42596bc
Unverified Commit c42596bc authored Oct 27, 2020 by Sylvain Gugger Committed by GitHub Oct 27, 2020
6 changed files
--- a/src/transformers/tokenization_camembert_fast.py
+++ b/src/transformers/tokenization_camembert_fast.py
@@ -66,30 +66,31 @@ class CamembertTokenizerFast(PreTrainedTokenizerFast):
    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main
    methods. Users should refer to this superclass for more information regarding those methods.

-    vocab_file (:obj:`str`): `SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm`
-    extension) that contains the vocabulary necessary to instantiate a tokenizer. bos_token (:obj:`str`, `optional`,
-    defaults to :obj:`"<s>"`): The beginning of sequence token that was used during pretraining. Can be used a sequence
-    classifier token.
+    Args:
+        vocab_file (:obj:`str`):
+            `SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that
+            contains the vocabulary necessary to instantiate a tokenizer.
+        bos_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
+            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.

            .. note::

-                When building a sequence using special tokens, this is not the token that is used for the beginning
-                of sequence. The token used is the :obj:`cls_token`.
+                When building a sequence using special tokens, this is not the token that is used for the beginning of
+                sequence. The token used is the :obj:`cls_token`.
        eos_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
            The end of sequence token.

            .. note::

-                When building a sequence using special tokens, this is not the token that is used for the end
-                of sequence. The token used is the :obj:`sep_token`.
+                When building a sequence using special tokens, this is not the token that is used for the end of
+                sequence. The token used is the :obj:`sep_token`.
        sep_token (:obj:`str`, `optional`, defaults to :obj:`"</s>"`):
-            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
-            for sequence classification or for a text and a question for question answering.
-            It is also used as the last token of a sequence built with special tokens.
+            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences for
+            sequence classification or for a text and a question for question answering. It is also used as the last
+            token of a sequence built with special tokens.
        cls_token (:obj:`str`, `optional`, defaults to :obj:`"<s>"`):
-            The classifier token which is used when doing sequence classification (classification of the whole
-            sequence instead of per-token classification). It is the first token of the sequence when built with
-            special tokens.
+            The classifier token which is used when doing sequence classification (classification of the whole sequence
+            instead of per-token classification). It is the first token of the sequence when built with special tokens.
        unk_token (:obj:`str`, `optional`, defaults to :obj:`"<unk>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.

--- a/src/transformers/tokenization_dpr.py
+++ b/src/transformers/tokenization_dpr.py
@@ -129,10 +129,10 @@ DPRReaderOutput = collections.namedtuple("DPRReaderOutput", ["start_logits", "en


 CUSTOM_DPR_READER_DOCSTRING = r"""
-        Return a dictionary with the token ids of the input strings and other information to give to
-        :obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
-        sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of
-        size :obj:`(n_passages, sequence_length)` with the format:
+    Return a dictionary with the token ids of the input strings and other information to give to
+    :obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
+    sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of size
+    :obj:`(n_passages, sequence_length)` with the format:

    ::

@@ -189,12 +189,12 @@ CUSTOM_DPR_READER_DOCSTRING = r"""

            `What are attention masks? <../glossary.html#attention-mask>`__

-    Return:
+    Returns:
        :obj:`Dict[str, List[List[int]]]`: A dictionary with the following keys:

        - ``input_ids``: List of token ids to be fed to a model.
        - ``attention_mask``: List of indices specifying which tokens should be attended to by the model.
-        """
+    """


 @add_start_docstrings(CUSTOM_DPR_READER_DOCSTRING)

--- a/src/transformers/tokenization_dpr_fast.py
+++ b/src/transformers/tokenization_dpr_fast.py
@@ -132,12 +132,12 @@ DPRReaderOutput = collections.namedtuple("DPRReaderOutput", ["start_logits", "en


 CUSTOM_DPR_READER_DOCSTRING = r"""
-        Return a dictionary with the token ids of the input strings and other information to give to
-        :obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
-        sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of
-        size :obj:`(n_passages, sequence_length)` with the format:
+    Return a dictionary with the token ids of the input strings and other information to give to
+    :obj:`.decode_best_spans`. It converts the strings of a question and different passages (title and text) in a
+    sequence of IDs (integers), using the tokenizer and vocabulary. The resulting :obj:`input_ids` is a matrix of size
+    :obj:`(n_passages, sequence_length)` with the format:

-        [CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>
+    [CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>

    Args:
        questions (:obj:`str` or :obj:`List[str]`):
@@ -195,7 +195,7 @@ CUSTOM_DPR_READER_DOCSTRING = r"""

        - ``input_ids``: List of token ids to be fed to a model.
        - ``attention_mask``: List of indices specifying which tokens should be attended to by the model.
-        """
+    """


 @add_start_docstrings(CUSTOM_DPR_READER_DOCSTRING)

--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -194,18 +194,21 @@ class Trainer:

            The function may have zero argument, or a single one containing the optuna/Ray Tune trial object, to be
            able to choose different architectures according to hyper parameters (such as layer count, sizes of inner
-            layers, dropout probabilities etc). compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
+            layers, dropout probabilities etc).
+        compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
            The function that will be used to compute metrics at evaluation. Must take a
-            :class:`~transformers.EvalPrediction` and return a dictionary string to metric values. callbacks (List of
-            :obj:`~transformers.TrainerCallback`, `optional`): A list of callbacks to customize the training loop. Will
-            add those to the list of default callbacks detailed in :doc:`here <callback>`.
+            :class:`~transformers.EvalPrediction` and return a dictionary string to metric values.
+        callbacks (List of :obj:`~transformers.TrainerCallback`, `optional`):
+            A list of callbacks to customize the training loop. Will add those to the list of default callbacks
+            detailed in :doc:`here <callback>`.

            If you want to remove one of the default callbacks used, use the :meth:`Trainer.remove_callback` method.
-            optimizers (:obj:`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR`, `optional`): A tuple
+        optimizers (:obj:`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR`, `optional`): A tuple
            containing the optimizer and the scheduler to use. Will default to an instance of
            :class:`~transformers.AdamW` on your model and a scheduler given by
-            :func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`. kwargs: Deprecated keyword
-            arguments.
+            :func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`.
+        kwargs:
+            Deprecated keyword arguments.
    """

    def __init__(

--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -144,29 +144,31 @@ class TrainingArguments:
            If using `nlp.Dataset` datasets, whether or not to automatically remove the columns unused by the model
            forward method.

-            (Note that this behavior is not implemented for :class:`~transformers.TFTrainer` yet.) label_names
-            (:obj:`List[str]`, `optional`): The list of keys in your dictionary of inputs that correspond to the
-            labels.
+            (Note that this behavior is not implemented for :class:`~transformers.TFTrainer` yet.)
+        label_names (:obj:`List[str]`, `optional`):
+            The list of keys in your dictionary of inputs that correspond to the labels.

            Will eventually default to :obj:`["labels"]` except if the model used is one of the
            :obj:`XxxForQuestionAnswering` in which case it will default to :obj:`["start_positions",
-            "end_positions"]`. load_best_model_at_end (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or
-            not to load the best model found during training at the end of training.
+            "end_positions"]`.
+        load_best_model_at_end (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            Whether or not to load the best model found during training at the end of training.

            .. note::

                When set to :obj:`True`, the parameters :obj:`save_steps` will be ignored and the model will be saved
                after each evaluation.
-        metric_for_best_model (:obj:`str`, `optional`)
+        metric_for_best_model (:obj:`str`, `optional`):
            Use in conjunction with :obj:`load_best_model_at_end` to specify the metric to use to compare two different
            models. Must be the name of a metric returned by the evaluation with or without the prefix :obj:`"eval_"`.
            Will default to :obj:`"loss"` if unspecified and :obj:`load_best_model_at_end=True` (to use the evaluation
            loss).

            If you set this value, :obj:`greater_is_better` will default to :obj:`True`. Don't forget to set it to
-            :obj:`False` if your metric is better when lower. greater_is_better (:obj:`bool`, `optional`) Use in
-            conjunction with :obj:`load_best_model_at_end` and :obj:`metric_for_best_model` to specify if better models
-            should have a greater metric or not. Will default to:
+            :obj:`False` if your metric is better when lower.
+        greater_is_better (:obj:`bool`, `optional`):
+            Use in conjunction with :obj:`load_best_model_at_end` and :obj:`metric_for_best_model` to specify if better
+            models should have a greater metric or not. Will default to:

            - :obj:`True` if :obj:`metric_for_best_model` is set to a value that isn't :obj:`"loss"` or
              :obj:`"eval_loss"`.

--- a/utils/style_doc.py
+++ b/utils/style_doc.py
@@ -312,10 +312,11 @@ class DocstringStyler(CodeStyler):
    """Class to style docstrings that take the main method from `CodeStyler`."""

    def is_no_style_block(self, line):
+        if _re_textual_blocks.search(line) is not None:
+            return False
        if _re_example.search(line) is not None:
            return True
        return _re_code_block.search(line) is not None
-        # return super().is_no_style_block(line) is not None

    def is_comment_or_textual_block(self, line):
        if _re_return.search(line) is not None: