Unverified Commit 721ee783 authored by Klaus Hipp's avatar Klaus Hipp Committed by GitHub
Browse files

[Docs] Fix spelling and grammar mistakes (#28825)

* Fix typos and grammar mistakes in docs and examples

* Fix typos in docstrings and comments

* Fix spelling of `tokenizer` in model tests

* Remove erroneous spaces in decorators

* Remove extra spaces in Markdown link texts
parent 2418c64a
......@@ -66,7 +66,7 @@ class Wav2Vec2BertConfig(PretrainedConfig):
attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
feat_proj_dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for the feature projection.
The dropout probability for the feature projection.
final_dropout (`float`, *optional*, defaults to 0.1):
The dropout probability for the final projection layer of [`Wav2Vec2BertForCTC`].
layerdrop (`float`, *optional*, defaults to 0.1):
......
......@@ -84,7 +84,7 @@ class Wav2Vec2ConformerConfig(PretrainedConfig):
The non-linear activation function (function or string) in the 1D convolutional layers of the feature
extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for quantized feature encoder states.
The dropout probability for quantized feature encoder states.
conv_dim (`Tuple[int]` or `List[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
......@@ -138,7 +138,7 @@ class Wav2Vec2ConformerConfig(PretrainedConfig):
contrastive_logits_temperature (`float`, *optional*, defaults to 0.1):
The temperature *kappa* in the contrastive loss.
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for the output of the feature encoder that's used by the quantizer.
The dropout probability for the output of the feature encoder that's used by the quantizer.
num_negatives (`int`, *optional*, defaults to 100):
Number of negative samples for the contrastive loss.
codevector_dim (`int`, *optional*, defaults to 256):
......
......@@ -55,7 +55,7 @@ class YolosConfig(PretrainedConfig):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"selu"` and `"gelu_new"` are supported.
hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
initializer_range (`float`, *optional*, defaults to 0.02):
......
......@@ -53,7 +53,7 @@ class YosoConfig(PretrainedConfig):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"selu"` and `"gelu_new"` are supported.
hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities.
max_position_embeddings (`int`, *optional*, defaults to 512):
......
......@@ -580,7 +580,7 @@ def parse_args():
default=128,
help=(
"The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
" sequences shorter will be padded if `--pad_to_max_lengh` is passed."
" sequences shorter will be padded if `--pad_to_max_length` is passed."
),
)
parser.add_argument(
......
......@@ -217,7 +217,7 @@ Next the questionnaire will ask
Should we add # Copied from statements when creating the new modeling file?
```
This is the intenal mechanism used in the library to make sure code copied from various modeling files stay consistent.
This is the internal mechanism used in the library to make sure code copied from various modeling files stay consistent.
If you plan to completely rewrite the modeling file, you should answer no, whereas if you just want to tweak one part
of the model, you should answer yes.
......
......@@ -56,7 +56,7 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
The non-linear activation function (function or string) in the encoder and pooler.
If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention probabilities.
max_position_embeddings (`int`, *optional*, defaults to 512):
......
......@@ -17,7 +17,7 @@
##
## It is to be used as such:
## Put '# To replace in: "FILE_PATH"' in order to indicate the contents will be copied in the file at path FILE_PATH
## Put '# Below: "STATEMENT"' in order to copy the contents below **the first occurence** of that line in the file at FILE_PATH
## Put '# Below: "STATEMENT"' in order to copy the contents below **the first occurrence** of that line in the file at FILE_PATH
## Put '# Replace with:' followed by the lines containing the content to define the content
## End a statement with '# End.'. If starting a new statement without redefining the FILE_PATH, it will continue pasting
## content in that file.
......
......@@ -166,7 +166,7 @@ class ByT5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertEqual(expected_src_tokens, batch["input_ids"][0])
self.assertEqual(expected_tgt_tokens, batch["labels"][0])
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
def test_save_and_load_tokenizer(self):
# safety check on max_len default value so we are sure the test works
tokenizers = self.get_tokenizers()
......
......@@ -82,7 +82,7 @@ class CanineTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
)
self.assertEqual(32, targets["input_ids"].shape[1])
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
def test_save_and_load_tokenizer(self):
# safety check on max_len default value so we are sure the test works
tokenizers = self.get_tokenizers()
......
......@@ -367,10 +367,10 @@ class LlamaIntegrationTest(unittest.TestCase):
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
assert fast == [319, 4559, 1243, 2]
slow_tokenzier = CodeLlamaTokenizer.from_pretrained(
slow_tokenizer = CodeLlamaTokenizer.from_pretrained(
"hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
)
slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
assert slow == [319, 4559, 1243, 2]
self.tokenizer.add_eos_token = False
......
......@@ -360,10 +360,10 @@ class LlamaIntegrationTest(unittest.TestCase):
fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
assert fast == [319, 4559, 1243, 2]
slow_tokenzier = LlamaTokenizer.from_pretrained(
slow_tokenizer = LlamaTokenizer.from_pretrained(
"hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
)
slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
assert slow == [319, 4559, 1243, 2]
self.tokenizer.add_eos_token = False
......
......@@ -148,7 +148,7 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
)
self.assertEqual(32, targets["input_ids"].shape[1])
# cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
# cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
def test_save_and_load_tokenizer(self):
# safety check on max_len default value so we are sure the test works
tokenizers = self.get_tokenizers()
......
......@@ -158,7 +158,7 @@ class Qwen2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertEqual(tokenizer_output_string, output_string)
def test_slow_tokenizer_decode_spaces_between_special_tokens_default(self):
# Qwen2Tokenzier changes the default `spaces_between_special_tokens` in `decode` to False
# Qwen2Tokenizer changes the default `spaces_between_special_tokens` in `decode` to False
if not self.test_slow_tokenizer:
return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment