Refactor Code samples; Test code samples (#5036)

* Refactor code samples * Test docstrings * Style * Tokenization examples * Run rust of tests * First step to testing source docs * Style and BART comment * Test the remainder of the code samples * Style * let to const * Formatting fixes * Ready for merge * Fix fixture + Style * Fix last tests * Update docs/source/quicktour.rst Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Addressing @sgugger's comments + Fix MobileBERT in TF Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Refactor Code samples; Test code samples (#5036)
* Refactor code samples * Test docstrings * Style * Tokenization examples * Run rust of tests * First step to testing source docs * Style and BART comment * Test the remainder of the code samples * Style * let to const * Formatting fixes * Ready for merge * Fix fixture + Style * Fix last tests * Update docs/source/quicktour.rst Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Addressing @sgugger's comments + Fix MobileBERT in TF Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
364a5ae1 · Lysandre Debut · GitHub · 315f464b · 364a5ae1 · 364a5ae1
Unverified Commit 364a5ae1 authored Jun 25, 2020 by Lysandre Debut Committed by GitHub Jun 25, 2020
8 changed files
--- a/src/transformers/modeling_xlm.py
+++ b/src/transformers/modeling_xlm.py
--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
--- a/src/transformers/tokenization_bart.py
+++ b/src/transformers/tokenization_bart.py
@@ -66,13 +66,15 @@ class MBartTokenizer(XLMRobertaTokenizer):
    The tokenization method is <tokens> <eos> <language code>. There is no BOS token.
    Examples::
-        from transformers import MBartTokenizer
-        tokenizer = MBartTokenizer.from_pretrained('mbart-large-en-ro')
+        >>> from transformers import MBartTokenizer
-        example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
+        >>> tokenizer = MBartTokenizer.from_pretrained('facebook/mbart-large-en-ro')
-        expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
+        >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
-        batch: dict = tokenizer.prepare_translation_batch(
+        >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
-            example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian
+        >>> batch: dict = tokenizer.prepare_translation_batch(
-        )
+        ...     example_english_phrase, src_lang="en_XX", tgt_lang="ro_RO", tgt_texts=expected_translation_romanian
+        ... )
    """
    vocab_files_names = {"vocab_file": "sentencepiece.bpe.model"}

--- a/src/transformers/tokenization_marian.py
+++ b/src/transformers/tokenization_marian.py
--- a/src/transformers/tokenization_reformer.py
+++ b/src/transformers/tokenization_reformer.py
@@ -81,6 +81,7 @@ class ReformerTokenizer(PreTrainedTokenizer):
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
+    model_input_names = ["attention_mask"]
    def __init__(
        self,

--- a/src/transformers/tokenization_t5.py
+++ b/src/transformers/tokenization_t5.py
--- a/tests/test_doc_samples.py
+++ b/tests/test_doc_samples.py
--- a/tests/test_modeling_tf_xlm.py
+++ b/tests/test_modeling_tf_xlm.py