minor doc fixes (#5831)

* minor doc fixes correct superclass name and small grammar fixes * correct the instance name in the error message It appears to be `BaseTokenizer` from looking at: `from tokenizers.implementations import BaseTokenizer as BaseTokenizerFast` and not `Tokenizer` as it currently says.

minor doc fixes (#5831)
* minor doc fixes correct superclass name and small grammar fixes * correct the instance name in the error message It appears to be `BaseTokenizer` from looking at: `from tokenizers.implementations import BaseTokenizer as BaseTokenizerFast` and not `Tokenizer` as it currently says.
2c0da780 · Stas Bekman · GitHub · feeb956a · 2c0da780
Unverified Commit 2c0da780 authored Jul 22, 2020 by Stas Bekman Committed by GitHub Jul 22, 2020
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

src/transformers/tokenization_utils_fast.py src/transformers/tokenization_utils_fast.py +6 -6

No files found.
--- a/src/transformers/tokenization_utils_fast.py
+++ b/src/transformers/tokenization_utils_fast.py
@@ -44,12 +44,12 @@ logger = logging.getLogger(__name__)
 class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
    """ Base class for all fast tokenizers (wrapping HuggingFace tokenizers library).
-    Inherit from PreTrainedTokenizer.
+    Inherits from PreTrainedTokenizerBase.
-    Handle all the shared methods for tokenization and special tokens as well as methods
+    Handles all the shared methods for tokenization and special tokens, as well as methods for
-    downloading/caching/loading pretrained tokenizers as well as adding tokens to the vocabulary.
+    downloading/caching/loading pretrained tokenizers, as well as adding tokens to the vocabulary.
-    This class also contain the added tokens in a unified way on top of all tokenizers so we don't
+    This class also contains the added tokens in a unified way on top of all tokenizers so we don't
    have to handle the specific vocabulary augmentation methods of the various underlying
    dictionary structures (BPE, sentencepiece...).
@@ -95,7 +95,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
        - ``mask_token``: (`Optional`) string: a masking token (e.g. when training a model with masked-language
            modeling). Will be associated to ``self.mask_token`` and ``self.mask_token_id``
        - ``additional_special_tokens``: (`Optional`) list: a list of additional special tokens.
-            Adding all special tokens here ensure they won't be split by the tokenization process.
+            Adding all special tokens here to ensure they won't be split by the tokenization process.
            Will be associated to ``self.additional_special_tokens`` and ``self.additional_special_tokens_ids``
@@ -105,7 +105,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
    def __init__(self, tokenizer: BaseTokenizerFast, **kwargs):
        if not isinstance(tokenizer, BaseTokenizerFast):
            raise ValueError(
-                "Tokenizer should be an instance of a Tokenizer " "provided by HuggingFace tokenizers library."
+                "Tokenizer should be an instance of a BaseTokenizer " "provided by HuggingFace tokenizers library."
            )
        self._tokenizer: BaseTokenizerFast = tokenizer