dictionnary => dictionary

c717d385 · thomwolf · 27b0f86d · c717d385 · c717d385 · c717d385
Commit c717d385 authored Jul 26, 2019 by thomwolf
7 changed files
--- a/docs/source/model_doc/overview.rst
+++ b/docs/source/model_doc/overview.rst
@@ -96,7 +96,7 @@ where
  ``cache_dir`` can be an optional path to a specific directory to download and cache the pre-trained model weights. This option is useful in particular when you are using distributed training: to avoid concurrent access to the same weights you can set for example ``cache_dir='./pretrained_model_{}'.format(args.local_rank)`` (see the section on distributed training for more information).

 * ``from_tf``\ : should we load the weights from a locally saved TensorFlow checkpoint
-* ``state_dict``\ : an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models
+* ``state_dict``\ : an optional state dictionary (collections.OrderedDict object) to use instead of Google pre-trained models
 * ``*inputs``\ , `**kwargs`: additional input for the specific Bert class (ex: num_labels for BertForSequenceClassification)

 ``Uncased`` means that the text has been lowercased before WordPiece tokenization, e.g., ``John Smith`` becomes ``john smith``. The Uncased model also strips out any accent markers. ``Cased`` means that the true case and accent markers are preserved. Typically, the Uncased model is better unless you know that case information is important for your task (e.g., Named Entity Recognition or Part-of-Speech tagging). For information about the Multilingual and Chinese model, see the `Multilingual README <https://github.com/google-research/bert/blob/master/multilingual.md>`__ or the original TensorFlow repository.

--- a/docs/source/serialization.rst
+++ b/docs/source/serialization.rst
@@ -40,7 +40,7 @@ where

 - `cache_dir` can be an optional path to a specific directory to download and cache the pre-trained model weights. This option is useful in particular when you are using distributed training: to avoid concurrent access to the same weights you can set for example `cache_dir='./pretrained_model_{}'.format(args.local_rank)` (see the section on distributed training for more information).
 - `from_tf`: should we load the weights from a locally saved TensorFlow checkpoint
- `state_dict`: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models
+- `state_dict`: an optional state dictionary (collections.OrderedDict object) to use instead of Google pre-trained models
 - `*inputs`, `**kwargs`: additional input for the specific Bert class (ex: num_labels for BertForSequenceClassification)

 `Uncased` means that the text has been lowercased before WordPiece tokenization, e.g., `John Smith` becomes `john smith`. The Uncased model also strips out any accent markers. `Cased` means that the true case and accent markers are preserved. Typically, the Uncased model is better unless you know that case information is important for your task (e.g., Named Entity Recognition or Part-of-Speech tagging). For information about the Multilingual and Chinese model, see the [Multilingual README](https://github.com/google-research/bert/blob/master/multilingual.md) or the original TensorFlow repository.

--- a/hubconfs/bert_hubconf.py
+++ b/hubconfs/bert_hubconf.py
@@ -37,7 +37,7 @@ bert_docstring = """
                 checkpoint
        cache_dir: an optional path to a folder in which the pre-trained models
                   will be cached.
-        state_dict: an optional state dictionnary
+        state_dict: an optional state dictionary
                    (collections.OrderedDict object) to use instead of Google
                    pre-trained models
        *inputs, **kwargs: additional input for the specific Bert class

--- a/hubconfs/gpt_hubconf.py
+++ b/hubconfs/gpt_hubconf.py
@@ -40,7 +40,7 @@ gpt_docstring = """
 				. a series of NumPy files containing OpenAI TensorFlow trained weights
 		from_tf: should we load the weights from a locally saved TensorFlow checkpoint
 		cache_dir: an optional path to a folder in which the pre-trained models will be cached.
-		state_dict: an optional state dictionnary (collections.OrderedDict object)
+		state_dict: an optional state dictionary (collections.OrderedDict object)
 		        	to use instead of pre-trained models
 		*inputs, **kwargs: additional input for the specific OpenAI-GPT class
 """

--- a/hubconfs/transformer_xl_hubconf.py
+++ b/hubconfs/transformer_xl_hubconf.py
@@ -23,7 +23,7 @@ transformer_xl_docstring = """
                . `model.chkpt` a TensorFlow checkpoint
        from_tf: should we load the weights from a locally saved TensorFlow checkpoint
        cache_dir: an optional path to a folder in which the pre-trained models will be cached.
-        state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models
+        state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
        *inputs, **kwargs: additional input for the specific TransformerXL class
 """


--- a/pytorch_transformers/modeling_utils.py
+++ b/pytorch_transformers/modeling_utils.py
@@ -358,7 +358,7 @@ class PreTrainedModel(nn.Module):
                Dictionary of key, values to update the configuration object after loading.
                Can be used to override selected configuration parameters. E.g. ``output_attention=True``.

-               - If a configuration is provided with `config`, **kwargs will be directly passed
+               - If a configuration is providedictionaryfig`, **kwargs will be directly passed
                 to the underlying model's __init__ method.
               - If a configuration is not provided, **kwargs will be first passed to the pretrained
                 model configuration class loading function (`PretrainedConfig.from_pretrained`).
@@ -367,7 +367,7 @@ class PreTrainedModel(nn.Module):
                 Remaining keys that do not correspond to any configuration attribute will
                 be passed to the underlying model's __init__ function.

-        Examples::
+        Examples::dictionary

            >>> model = BertModel.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
            >>> model = BertModel.from_pretrained('./test/saved_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`

--- a/pytorch_transformers/tokenization_utils.py
+++ b/pytorch_transformers/tokenization_utils.py
@@ -37,7 +37,7 @@ class PreTrainedTokenizer(object):
            additional_special_tokens = []

        We defined an added_tokens_encoder to add new tokens to the vocabulary without having to handle the
-            specific vocabulary augmentation methods of the various underlying dictionnary structures (BPE, sentencepiece...).
+            specific vocabulary augmentation methods of the various underlying dictionary structures (BPE, sentencepiece...).
    """
    vocab_files_names = {}
    pretrained_vocab_files_map = {}
@@ -324,7 +324,7 @@ class PreTrainedTokenizer(object):


    def add_special_tokens(self, special_tokens_dict):
-        """ Add a dictionnary of special tokens (eos, pad, cls...) to the encoder and link them
+        """ Add a dictionary of special tokens (eos, pad, cls...) to the encoder and link them
            to class attributes. If the special tokens are not in the vocabulary, they are added
            to it and indexed starting from the last index of the current vocabulary.