Unverified Commit 95113d13 authored by Yulv-git's avatar Yulv-git Committed by GitHub
Browse files

Fix some typos. (#17560)



* Fix some typos.
Signed-off-by: default avatarYulv-git <yulvchi@qq.com>

* Fix typo.
Signed-off-by: default avatarYulv-git <yulvchi@qq.com>

* make fixup.
parent ad28ca29
...@@ -231,7 +231,7 @@ def _compute_mask_indices( ...@@ -231,7 +231,7 @@ def _compute_mask_indices(
) )
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length) spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span # add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :] offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape( offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length batch_size, max_num_masked_span * mask_length
......
...@@ -82,10 +82,10 @@ class WavLMConfig(PretrainedConfig): ...@@ -82,10 +82,10 @@ class WavLMConfig(PretrainedConfig):
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers. feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`): conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*. of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`): conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of length of *conv_kernel* defines the number of convolutional layers and has to match the length of
*conv_dim*. *conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`): conv_bias (`bool`, *optional*, defaults to `False`):
Whether the 1D convolutional layers have a bias. Whether the 1D convolutional layers have a bias.
......
...@@ -183,7 +183,7 @@ def _compute_mask_indices( ...@@ -183,7 +183,7 @@ def _compute_mask_indices(
) )
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length) spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span # add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :] offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape( offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length batch_size, max_num_masked_span * mask_length
......
...@@ -1069,7 +1069,7 @@ class YolosLoss(nn.Module): ...@@ -1069,7 +1069,7 @@ class YolosLoss(nn.Module):
# Retrieve the matching between the outputs of the last layer and the targets # Retrieve the matching between the outputs of the last layer and the targets
indices = self.matcher(outputs_without_aux, targets) indices = self.matcher(outputs_without_aux, targets)
# Compute the average number of target boxes accross all nodes, for normalization purposes # Compute the average number of target boxes across all nodes, for normalization purposes
num_boxes = sum(len(t["class_labels"]) for t in targets) num_boxes = sum(len(t["class_labels"]) for t in targets)
num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device) num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
# (Niels): comment out function below, distributed training to be added # (Niels): comment out function below, distributed training to be added
......
...@@ -487,7 +487,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC): ...@@ -487,7 +487,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
def fill_with_past_key_values_(self, inputs_or_outputs: Mapping[str, Mapping[int, str]], direction: str): def fill_with_past_key_values_(self, inputs_or_outputs: Mapping[str, Mapping[int, str]], direction: str):
""" """
Fill the input_or_ouputs mapping with past_key_values dynamic axes considering. Fill the input_or_outputs mapping with past_key_values dynamic axes considering.
Args: Args:
inputs_or_outputs: The mapping to fill. inputs_or_outputs: The mapping to fill.
......
...@@ -412,8 +412,8 @@ class TrainingArguments: ...@@ -412,8 +412,8 @@ class TrainingArguments:
down the training and evaluation speed. down the training and evaluation speed.
push_to_hub (`bool`, *optional*, defaults to `False`): push_to_hub (`bool`, *optional*, defaults to `False`):
Whether or not to push the model to the Hub every time the model is saved. If this is activated, Whether or not to push the model to the Hub every time the model is saved. If this is activated,
`output_dir` will begin a git directory synced with the the repo (determined by `hub_model_id`) and the `output_dir` will begin a git directory synced with the repo (determined by `hub_model_id`) and the content
content will be pushed each time a save is triggered (depending on your `save_strategy`). Calling will be pushed each time a save is triggered (depending on your `save_strategy`). Calling
[`~Trainer.save_model`] will also trigger a push. [`~Trainer.save_model`] will also trigger a push.
<Tip warning={true}> <Tip warning={true}>
...@@ -434,7 +434,7 @@ class TrainingArguments: ...@@ -434,7 +434,7 @@ class TrainingArguments:
`"organization_name/model"`. Will default to `user_name/output_dir_name` with *output_dir_name* being the `"organization_name/model"`. Will default to `user_name/output_dir_name` with *output_dir_name* being the
name of `output_dir`. name of `output_dir`.
Will default to to the name of `output_dir`. Will default to the name of `output_dir`.
hub_strategy (`str` or [`~trainer_utils.HubStrategy`], *optional*, defaults to `"every_save"`): hub_strategy (`str` or [`~trainer_utils.HubStrategy`], *optional*, defaults to `"every_save"`):
Defines the scope of what is pushed to the Hub and when. Possible values are: Defines the scope of what is pushed to the Hub and when. Possible values are:
......
...@@ -990,7 +990,7 @@ tokenizer. ...@@ -990,7 +990,7 @@ tokenizer.
For [camelcase name of model], the tokenizer files can be found here: For [camelcase name of model], the tokenizer files can be found here:
- [To be filled out by mentor] - [To be filled out by mentor]
and having implemented the 🤗Transformers' version of the tokenizer can be loaded as follows: and having implemented the 🤗 Transformers' version of the tokenizer can be loaded as follows:
[To be filled out by mentor] [To be filled out by mentor]
......
...@@ -2821,7 +2821,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec ...@@ -2821,7 +2821,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
self.model = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="model") self.model = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="model")
self.model._set_save_spec(inputs=self.serving.input_signature) self.model._set_save_spec(inputs=self.serving.input_signature)
self.use_cache = config.use_cache self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency. # final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight( self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
) )
......
...@@ -2183,7 +2183,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -2183,7 +2183,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False) sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False)
total_length = len(sequence["input_ids"]) total_length = len(sequence["input_ids"])
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short") self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length # Test with max model input length
model_max_length = tokenizer.model_max_length model_max_length = tokenizer.model_max_length
...@@ -2193,7 +2195,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -2193,7 +2195,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False) sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"]) total_length1 = len(sequence1["input_ids"])
self.assertGreater( self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short" total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
) )
# Simple # Simple
......
...@@ -2097,7 +2097,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -2097,7 +2097,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False) sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False)
total_length = len(sequence["input_ids"]) total_length = len(sequence["input_ids"])
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short") self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length # Test with max model input length
model_max_length = tokenizer.model_max_length model_max_length = tokenizer.model_max_length
...@@ -2107,7 +2109,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -2107,7 +2109,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False) sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"]) total_length1 = len(sequence1["input_ids"])
self.assertGreater( self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short" total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
) )
# Simple # Simple
......
...@@ -281,7 +281,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -281,7 +281,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
super().check_pt_tf_models(tf_model, pt_model, tf_inputs_dict) super().check_pt_tf_models(tf_model, pt_model, tf_inputs_dict)
# overwrite from common since TFViTMAEForPretraining outputs loss along with # overwrite from common since TFViTMAEForPretraining outputs loss along with
# logits and mask indices. loss and mask indicies are not suitable for integration # logits and mask indices. loss and mask indices are not suitable for integration
# with other keras modules. # with other keras modules.
def test_compile_tf_model(self): def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
...@@ -278,7 +278,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest ...@@ -278,7 +278,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
NER_MODEL = "dbmdz/bert-large-cased-finetuned-conll03-english" NER_MODEL = "dbmdz/bert-large-cased-finetuned-conll03-english"
model = AutoModelForTokenClassification.from_pretrained(NER_MODEL) model = AutoModelForTokenClassification.from_pretrained(NER_MODEL)
tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, use_fast=True) tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, use_fast=True)
sentence = """Enzo works at the the UN""" sentence = """Enzo works at the UN"""
token_classifier = pipeline("ner", model=model, tokenizer=tokenizer) token_classifier = pipeline("ner", model=model, tokenizer=tokenizer)
output = token_classifier(sentence) output = token_classifier(sentence)
self.assertEqual( self.assertEqual(
......
...@@ -990,7 +990,9 @@ class TokenizerTesterMixin: ...@@ -990,7 +990,9 @@ class TokenizerTesterMixin:
sequence = tokenizer.encode(seq_0, add_special_tokens=False) sequence = tokenizer.encode(seq_0, add_special_tokens=False)
total_length = len(sequence) total_length = len(sequence)
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short") self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length # Test with max model input length
model_max_length = tokenizer.model_max_length model_max_length = tokenizer.model_max_length
...@@ -1000,7 +1002,9 @@ class TokenizerTesterMixin: ...@@ -1000,7 +1002,9 @@ class TokenizerTesterMixin:
sequence1 = tokenizer(seq_1, add_special_tokens=False) sequence1 = tokenizer(seq_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"]) total_length1 = len(sequence1["input_ids"])
self.assertGreater( self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short" total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
) )
# Simple # Simple
......
...@@ -53,7 +53,7 @@ def _find_text_in_file(filename, start_prompt, end_prompt): ...@@ -53,7 +53,7 @@ def _find_text_in_file(filename, start_prompt, end_prompt):
return "".join(lines[start_index:end_index]), start_index, end_index, lines return "".join(lines[start_index:end_index]), start_index, end_index, lines
# Add here suffixes that are used to identify models, seperated by | # Add here suffixes that are used to identify models, separated by |
ALLOWED_MODEL_SUFFIXES = "Model|Encoder|Decoder|ForConditionalGeneration" ALLOWED_MODEL_SUFFIXES = "Model|Encoder|Decoder|ForConditionalGeneration"
# Regexes that match TF/Flax/PT model names. # Regexes that match TF/Flax/PT model names.
_re_tf_models = re.compile(r"TF(.*)(?:Model|Encoder|Decoder|ForConditionalGeneration)") _re_tf_models = re.compile(r"TF(.*)(?:Model|Encoder|Decoder|ForConditionalGeneration)")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment