updating docstring for AutoModel

447afe9c · thomwolf · 84a3a968 · 447afe9c · 447afe9c · 447afe9c
Commit 447afe9c authored Aug 27, 2019 by thomwolf
3 changed files
--- a/pytorch_transformers/modeling_auto.py
+++ b/pytorch_transformers/modeling_auto.py
--- a/pytorch_transformers/modeling_utils.py
+++ b/pytorch_transformers/modeling_utils.py
@@ -59,6 +59,12 @@ if not six.PY2:
            fn.__doc__ = ''.join(docstr) + fn.__doc__
            return fn
        return docstring_decorator
+    def add_end_docstrings(*docstr):
+        def docstring_decorator(fn):
+            fn.__doc__ = fn.__doc__ + ''.join(docstr)
+            return fn
+        return docstring_decorator
 else:
    # Not possible to update class docstrings on python2
    def add_start_docstrings(*docstr):
@@ -66,6 +72,11 @@ else:
            return fn
        return docstring_decorator
+    def add_end_docstrings(*docstr):
+        def docstring_decorator(fn):
+            return fn
+        return docstring_decorator
 class PretrainedConfig(object):
    r""" Base class for all configuration classes.

--- a/pytorch_transformers/tokenization_auto.py
+++ b/pytorch_transformers/tokenization_auto.py
@@ -69,15 +69,25 @@ class AutoTokenizer(object):
            - contains `roberta`: RobertaTokenizer (XLM model)
        Params:
-            **pretrained_model_name_or_path**: either:
+            pretrained_model_name_or_path: either:
-                - a string with the `shortcut name` of a pre-trained model configuration to load from cache
-                    or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
+                - a string with the `shortcut name` of a predefined tokenizer to load from cache or download, e.g.: ``bert-base-uncased``.
-                - a path to a `directory` containing a configuration file saved
+                - a path to a `directory` containing vocabulary files required by the tokenizer, for instance saved using the :func:`~pytorch_transformers.PreTrainedTokenizer.save_pretrained` method, e.g.: ``./my_model_directory/``.
-                    using the `save_pretrained(save_directory)` method.
+                - (not applicable to all derived classes) a path or url to a single saved vocabulary file if and only if the tokenizer only requires a single vocabulary file (e.g. Bert, XLNet), e.g.: ``./my_model_directory/vocab.txt``.
-                - a path or url to a saved configuration `file`.
-            **cache_dir**: (`optional`) string:
+            cache_dir: (`optional`) string:
-                Path to a directory in which a downloaded pre-trained model
+                Path to a directory in which a downloaded predefined tokenizer vocabulary files should be cached if the standard cache should not be used.
-                configuration should be cached if the standard cache should not be used.
+            force_download: (`optional`) boolean, default False:
+                Force to (re-)download the vocabulary files and override the cached versions if they exists.
+            proxies: (`optional`) dict, default None:
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
+                The proxies are used on each request.
+            inputs: (`optional`) positional arguments: will be passed to the Tokenizer ``__init__`` method.
+            kwargs: (`optional`) keyword arguments: will be passed to the Tokenizer ``__init__`` method. Can be used to set special tokens like ``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``, ``mask_token``, ``additional_special_tokens``. See parameters in the doc string of :class:`~pytorch_transformers.PreTrainedTokenizer` for details.
        Examples::