"vscode:/vscode.git/clone" did not exist on "ba56edab19715fe8d1ae75e710a8847944009f43"
Unverified Commit 95113d13 authored by Yulv-git's avatar Yulv-git Committed by GitHub
Browse files

Fix some typos. (#17560)



* Fix some typos.
Signed-off-by: default avatarYulv-git <yulvchi@qq.com>

* Fix typo.
Signed-off-by: default avatarYulv-git <yulvchi@qq.com>

* make fixup.
parent ad28ca29
......@@ -231,7 +231,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length
......
......@@ -82,10 +82,10 @@ class WavLMConfig(PretrainedConfig):
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
length of *conv_kernel* defines the number of convolutional layers and has to match the length of
*conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`):
Whether the 1D convolutional layers have a bias.
......
......@@ -183,7 +183,7 @@ def _compute_mask_indices(
)
spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
# add offset to the starting indexes so that that indexes now create a span
# add offset to the starting indexes so that indexes now create a span
offsets = np.arange(mask_length)[None, None, :]
offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
batch_size, max_num_masked_span * mask_length
......
......@@ -1069,7 +1069,7 @@ class YolosLoss(nn.Module):
# Retrieve the matching between the outputs of the last layer and the targets
indices = self.matcher(outputs_without_aux, targets)
# Compute the average number of target boxes accross all nodes, for normalization purposes
# Compute the average number of target boxes across all nodes, for normalization purposes
num_boxes = sum(len(t["class_labels"]) for t in targets)
num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
# (Niels): comment out function below, distributed training to be added
......
......@@ -487,7 +487,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
def fill_with_past_key_values_(self, inputs_or_outputs: Mapping[str, Mapping[int, str]], direction: str):
"""
Fill the input_or_ouputs mapping with past_key_values dynamic axes considering.
Fill the input_or_outputs mapping with past_key_values dynamic axes considering.
Args:
inputs_or_outputs: The mapping to fill.
......
......@@ -412,8 +412,8 @@ class TrainingArguments:
down the training and evaluation speed.
push_to_hub (`bool`, *optional*, defaults to `False`):
Whether or not to push the model to the Hub every time the model is saved. If this is activated,
`output_dir` will begin a git directory synced with the the repo (determined by `hub_model_id`) and the
content will be pushed each time a save is triggered (depending on your `save_strategy`). Calling
`output_dir` will begin a git directory synced with the repo (determined by `hub_model_id`) and the content
will be pushed each time a save is triggered (depending on your `save_strategy`). Calling
[`~Trainer.save_model`] will also trigger a push.
<Tip warning={true}>
......@@ -434,7 +434,7 @@ class TrainingArguments:
`"organization_name/model"`. Will default to `user_name/output_dir_name` with *output_dir_name* being the
name of `output_dir`.
Will default to to the name of `output_dir`.
Will default to the name of `output_dir`.
hub_strategy (`str` or [`~trainer_utils.HubStrategy`], *optional*, defaults to `"every_save"`):
Defines the scope of what is pushed to the Hub and when. Possible values are:
......
......@@ -990,7 +990,7 @@ tokenizer.
For [camelcase name of model], the tokenizer files can be found here:
- [To be filled out by mentor]
and having implemented the 🤗Transformers' version of the tokenizer can be loaded as follows:
and having implemented the 🤗 Transformers' version of the tokenizer can be loaded as follows:
[To be filled out by mentor]
......
......@@ -2821,7 +2821,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
self.model = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="model")
self.model._set_save_spec(inputs=self.serving.input_signature)
self.use_cache = config.use_cache
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
# final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
self.final_logits_bias = self.add_weight(
name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
)
......
......@@ -2183,7 +2183,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False)
total_length = len(sequence["input_ids"])
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length
model_max_length = tokenizer.model_max_length
......@@ -2193,7 +2195,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"])
self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
)
# Simple
......
......@@ -2097,7 +2097,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False)
total_length = len(sequence["input_ids"])
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length
model_max_length = tokenizer.model_max_length
......@@ -2107,7 +2109,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"])
self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
)
# Simple
......
......@@ -281,7 +281,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
super().check_pt_tf_models(tf_model, pt_model, tf_inputs_dict)
# overwrite from common since TFViTMAEForPretraining outputs loss along with
# logits and mask indices. loss and mask indicies are not suitable for integration
# logits and mask indices. loss and mask indices are not suitable for integration
# with other keras modules.
def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
......@@ -278,7 +278,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
NER_MODEL = "dbmdz/bert-large-cased-finetuned-conll03-english"
model = AutoModelForTokenClassification.from_pretrained(NER_MODEL)
tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, use_fast=True)
sentence = """Enzo works at the the UN"""
sentence = """Enzo works at the UN"""
token_classifier = pipeline("ner", model=model, tokenizer=tokenizer)
output = token_classifier(sentence)
self.assertEqual(
......
......@@ -990,7 +990,9 @@ class TokenizerTesterMixin:
sequence = tokenizer.encode(seq_0, add_special_tokens=False)
total_length = len(sequence)
self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
self.assertGreater(
total_length, 4, "Issue with the testing sequence, please update it, it's too short"
)
# Test with max model input length
model_max_length = tokenizer.model_max_length
......@@ -1000,7 +1002,9 @@ class TokenizerTesterMixin:
sequence1 = tokenizer(seq_1, add_special_tokens=False)
total_length1 = len(sequence1["input_ids"])
self.assertGreater(
total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
total_length1,
model_max_length,
"Issue with the testing sequence, please update it, it's too short",
)
# Simple
......
......@@ -53,7 +53,7 @@ def _find_text_in_file(filename, start_prompt, end_prompt):
return "".join(lines[start_index:end_index]), start_index, end_index, lines
# Add here suffixes that are used to identify models, seperated by |
# Add here suffixes that are used to identify models, separated by |
ALLOWED_MODEL_SUFFIXES = "Model|Encoder|Decoder|ForConditionalGeneration"
# Regexes that match TF/Flax/PT model names.
_re_tf_models = re.compile(r"TF(.*)(?:Model|Encoder|Decoder|ForConditionalGeneration)")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment