Fix all sphynx warnings (#5068)

011cc0be · Sylvain Gugger · GitHub · af497b56 · 011cc0be · 011cc0be
Unverified Commit 011cc0be authored Jun 16, 2020 by Sylvain Gugger Committed by GitHub Jun 16, 2020
5 changed files
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -530,6 +530,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
            config: (`optional`) one of:
                - an instance of a class derived from :class:`~transformers.PretrainedConfig`, or
                - a string valid as input to :func:`~transformers.PretrainedConfig.from_pretrained()`
+
                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
                    - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
                    - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.

--- a/src/transformers/optimization_tf.py
+++ b/src/transformers/optimization_tf.py
@@ -97,13 +97,13 @@ def create_optimizer(

 class AdamWeightDecay(tf.keras.optimizers.Adam):
    """Adam enables L2 weight decay and clip_by_global_norm on gradients.
-  Just adding the square of the weights to the loss function is *not* the
-  correct way of using L2 regularization/weight decay with Adam, since that will
-  interact with the m and v parameters in strange ways.
-  Instead we want ot decay the weights in a manner that doesn't interact with
-  the m/v parameters. This is equivalent to adding the square of the weights to
-  the loss with plain (non-momentum) SGD.
-  """
+    Just adding the square of the weights to the loss function is *not* the
+    correct way of using L2 regularization/weight decay with Adam, since that will
+    interact with the m and v parameters in strange ways.
+    Instead we want ot decay the weights in a manner that doesn't interact with
+    the m/v parameters. This is equivalent to adding the square of the weights to
+    the loss with plain (non-momentum) SGD.
+    """

    def __init__(
        self,
@@ -198,11 +198,11 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
 # Extracted from https://github.com/OpenNMT/OpenNMT-tf/blob/master/opennmt/optimizers/utils.py
 class GradientAccumulator(object):
    """Gradient accumulation utility.
-  When used with a distribution strategy, the accumulator should be called in a
-  replica context. Gradients will be accumulated locally on each replica and
-  without synchronization. Users should then call ``.gradients``, scale the
-  gradients if required, and pass the result to ``apply_gradients``.
-  """
+    When used with a distribution strategy, the accumulator should be called in a
+    replica context. Gradients will be accumulated locally on each replica and
+    without synchronization. Users should then call ``.gradients``, scale the
+    gradients if required, and pass the result to ``apply_gradients``.
+    """

    # We use the ON_READ synchronization policy so that no synchronization is
    # performed on assignment. To get the value, we call .value() which returns the

--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -323,6 +323,7 @@ class Pipeline(_ScikitCompat):

    Base class implementing pipelined operations.
    Pipeline workflow is defined as a sequence of the following operations:
+
        Input -> Tokenization -> Model Inference -> Post-Processing (Task dependent) -> Output

    Pipeline supports running on CPU or GPU through the device argument. Users can specify

--- a/src/transformers/tokenization_auto.py
+++ b/src/transformers/tokenization_auto.py
@@ -103,6 +103,7 @@ class AutoTokenizer:
        The `from_pretrained()` method takes care of returning the correct tokenizer class instance
        based on the `model_type` property of the config object, or when it's missing,
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+
            - `t5`: T5Tokenizer (T5 model)
            - `distilbert`: DistilBertTokenizer (DistilBert model)
            - `albert`: AlbertTokenizer (ALBERT model)
@@ -136,6 +137,7 @@ class AutoTokenizer:
        The tokenizer class to instantiate is selected
        based on the `model_type` property of the config object, or when it's missing,
        falling back to using pattern matching on the `pretrained_model_name_or_path` string:
+
            - `t5`: T5Tokenizer (T5 model)
            - `distilbert`: DistilBertTokenizer (DistilBert model)
            - `albert`: AlbertTokenizer (ALBERT model)

--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1408,7 +1408,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
                The sequence or batch of sequences to be encoded.
                Each sequence can be a string or a list of strings (pre-tokenized string).
                If the sequences are provided as list of strings (pretokenized), you must set `is_pretokenized=True`
-                 (to lift the ambiguity with a batch of sequences)
+                (to lift the ambiguity with a batch of sequences)
            text_pair (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]``):
                The sequence or batch of sequences to be encoded.
                Each sequence can be a string or a list of strings (pre-tokenized string).