Fixes for the documentation (#13361)

7a26307e · Sylvain Gugger · GitHub · 0b8c84e1 · 7a26307e · 7a26307e
Unverified Commit 7a26307e authored Sep 01, 2021 by Sylvain Gugger Committed by GitHub Sep 01, 2021
7 changed files
--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -87,7 +87,7 @@ class PretrainedConfig(PushToHubMixin):
            Whether cross-attention layers should be added to the model. Note, this option is only relevant for models
            that can be used as decoder models within the `:class:~transformers.EncoderDecoderModel` class, which
            consists of all models in ``AUTO_MODELS_FOR_CAUSAL_LM``.
-        tie_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`)
+        tie_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`):
            Whether all encoder weights should be tied to their equivalent decoder weights. This requires the encoder
            and decoder model to have the exact same parameter names.
        prune_heads (:obj:`Dict[int, List[int]]`, `optional`, defaults to :obj:`{}`):

--- a/src/transformers/data/__init__.py
+++ b/src/transformers/data/__init__.py
@@ -16,6 +16,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .data_collator import (
+    DataCollatorForLanguageModeling,
+    DataCollatorForPermutationLanguageModeling,
+    DataCollatorForSeq2Seq,
+    DataCollatorForSOP,
+    DataCollatorForTokenClassification,
+    DataCollatorForWholeWordMask,
+    DataCollatorWithPadding,
+    default_data_collator,
+)
 from .metrics import glue_compute_metrics, xnli_compute_metrics
 from .processors import (
    DataProcessor,

--- a/src/transformers/deepspeed.py
+++ b/src/transformers/deepspeed.py
@@ -50,7 +50,7 @@ class HfDeepSpeedConfig:
    values: ``"auto"``. Without this special logic the DeepSpeed configuration is not modified in any way.
    Args:
-        config_file_or_dict (:obj:`Union[str, Dict]`) - path to DeepSpeed config file or dict.
+        config_file_or_dict (:obj:`Union[str, Dict]`): path to DeepSpeed config file or dict.
    """

--- a/src/transformers/generation_tf_utils.py
+++ b/src/transformers/generation_tf_utils.py
@@ -1535,10 +1535,14 @@ def tf_top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-float("In
    Args:
        logits: logits distribution shape (batch size, vocabulary size)
-        if top_k > 0: keep only top k tokens with highest probability (top-k filtering).
+        top_k (:obj:`int`, `optional`, defaults to 0):
-        if top_p < 1.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
+            If > 0, only keep the top k tokens with highest probability (top-k filtering)
-            Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
+        top_p (:obj:`float`, `optional`, defaults to 1.0):
-        Make sure we keep at least min_tokens_to_keep per batch example in the output
+            If < 1.0, only keep the top tokens with cumulative probability >= top_p (nucleus filtering). Nucleus
+            filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
+        min_tokens_to_keep (:obj:`int`, `optional`, defaults to 1):
+            Minimumber of tokens we keep per batch example in the output.
    From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    logits_shape = shape_list(logits)

--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -2557,10 +2557,14 @@ def top_k_top_p_filtering(
    Args:
        logits: logits distribution shape (batch size, vocabulary size)
-        if top_k > 0: keep only top k tokens with highest probability (top-k filtering).
+        top_k (:obj:`int`, `optional`, defaults to 0):
-        if top_p < 1.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
+            If > 0, only keep the top k tokens with highest probability (top-k filtering)
-            Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
+        top_p (:obj:`float`, `optional`, defaults to 1.0):
-        Make sure we keep at least min_tokens_to_keep per batch example in the output
+            If < 1.0, only keep the top tokens with cumulative probability >= top_p (nucleus filtering). Nucleus
+            filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
+        min_tokens_to_keep (:obj:`int`, `optional`, defaults to 1):
+            Minimumber of tokens we keep per batch example in the output.
    From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    if top_k > 0:

--- a/src/transformers/models/speech_to_text/configuration_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/configuration_speech_to_text.py
@@ -78,7 +78,7 @@ class Speech2TextConfig(PretrainedConfig):
            Whether or not the model should return the last key/values attentions (not used by all models).
        max_source_positions (:obj:`int`, `optional`, defaults to 6000):
            The maximum sequence length of log-mel filter-bank features that this model might ever be used with.
-        max_target_positions: (:obj:`int`, `optional`, defaults to 1024):
+        max_target_positions (:obj:`int`, `optional`, defaults to 1024):
            The maximum sequence length that this model might ever be used with. Typically set this to something large
            just in case (e.g., 512 or 1024 or 2048).
        num_conv_layers (:obj:`int`, `optional`, defaults to 2):
@@ -95,7 +95,7 @@ class Speech2TextConfig(PretrainedConfig):
        input_channels (:obj:`int`, `optional`, defaults to 1):
            An integer specifying number of input channels of the input feature vector.
-        Example::
+    Example::
        >>> from transformers import Speech2TextModel, Speech2TextConfig

--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -306,10 +306,10 @@ def pipeline(
            - :obj:`"feature-extraction"`: will return a :class:`~transformers.FeatureExtractionPipeline`.
            - :obj:`"text-classification"`: will return a :class:`~transformers.TextClassificationPipeline`.
-            - :obj:`"sentiment-analysis"`: (alias of :obj:`"text-classification") will return a
+            - :obj:`"sentiment-analysis"`: (alias of :obj:`"text-classification"`) will return a
              :class:`~transformers.TextClassificationPipeline`.
            - :obj:`"token-classification"`: will return a :class:`~transformers.TokenClassificationPipeline`.
-            - :obj:`"ner"` (alias of :obj:`"token-classification"): will return a
+            - :obj:`"ner"` (alias of :obj:`"token-classification"`): will return a
              :class:`~transformers.TokenClassificationPipeline`.
            - :obj:`"question-answering"`: will return a :class:`~transformers.QuestionAnsweringPipeline`.
            - :obj:`"fill-mask"`: will return a :class:`~transformers.FillMaskPipeline`.