Unverified Commit 969859d5 authored by Santiago Castro's avatar Santiago Castro Committed by GitHub
Browse files

Fix doc errors and typos across the board (#8139)

* Fix doc errors and typos across the board

* Fix a typo

* Fix the CI

* Fix more typos

* Fix CI

* More fixes

* Fix CI

* More fixes

* More fixes
parent 4731a00c
...@@ -53,7 +53,7 @@ TOKENIZER_FILE = "tokenizer.json" ...@@ -53,7 +53,7 @@ TOKENIZER_FILE = "tokenizer.json"
SPECIAL_TOKENS_MAP_FILE = "special_tokens_map.json" SPECIAL_TOKENS_MAP_FILE = "special_tokens_map.json"
TOKENIZER_CONFIG_FILE = "tokenizer_config.json" TOKENIZER_CONFIG_FILE = "tokenizer_config.json"
# Slow tokenizers have an additional addedd tokens files # Slow tokenizers have an additional added tokens files
ADDED_TOKENS_FILE = "added_tokens.json" ADDED_TOKENS_FILE = "added_tokens.json"
...@@ -211,7 +211,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): ...@@ -211,7 +211,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
vocabulary. vocabulary.
Args: Args:
token (:obj:`str` or :obj:`List[str]`): One or several token(s) to convert to token id(s). tokens (:obj:`str` or :obj:`List[str]`): One or several token(s) to convert to token id(s).
Returns: Returns:
:obj:`int` or :obj:`List[int]`: The token id or list of token ids. :obj:`int` or :obj:`List[int]`: The token id or list of token ids.
...@@ -473,7 +473,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): ...@@ -473,7 +473,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
) )
# Return tensor is None, then we can remove the leading batch axis # Return tensor is None, then we can remove the leading batch axis
# Overfolwing tokens are returned as a batch of output so we keep them in this case # Overflowing tokens are returned as a batch of output so we keep them in this case
if return_tensors is None and not return_overflowing_tokens: if return_tensors is None and not return_overflowing_tokens:
batched_output = BatchEncoding( batched_output = BatchEncoding(
{ {
......
...@@ -909,7 +909,7 @@ class XLMTokenizer(PreTrainedTokenizer): ...@@ -909,7 +909,7 @@ class XLMTokenizer(PreTrainedTokenizer):
if token_ids_1 is not None: if token_ids_1 is not None:
raise ValueError( raise ValueError(
"You should not supply a second sequence if the provided sequence of " "You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model." "ids is already formatted with special tokens for the model."
) )
return list( return list(
map( map(
......
...@@ -202,7 +202,7 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer): ...@@ -202,7 +202,7 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer):
if token_ids_1 is not None: if token_ids_1 is not None:
raise ValueError( raise ValueError(
"You should not supply a second sequence if the provided sequence of " "You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model." "ids is already formatted with special tokens for the model."
) )
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
......
...@@ -205,7 +205,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer): ...@@ -205,7 +205,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer):
if token_ids_1 is not None: if token_ids_1 is not None:
raise ValueError( raise ValueError(
"You should not supply a second sequence if the provided sequence of " "You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model." "ids is already formatted with special tokens for the model."
) )
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
......
...@@ -194,7 +194,7 @@ class XLMRobertaTokenizerFast(PreTrainedTokenizerFast): ...@@ -194,7 +194,7 @@ class XLMRobertaTokenizerFast(PreTrainedTokenizerFast):
if token_ids_1 is not None: if token_ids_1 is not None:
raise ValueError( raise ValueError(
"You should not supply a second sequence if the provided sequence of " "You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model." "ids is already formatted with special tokens for the model."
) )
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
......
...@@ -270,7 +270,7 @@ class XLNetTokenizer(PreTrainedTokenizer): ...@@ -270,7 +270,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
if token_ids_1 is not None: if token_ids_1 is not None:
raise ValueError( raise ValueError(
"You should not supply a second sequence if the provided sequence of " "You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model." "ids is already formatted with special tokens for the model."
) )
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
......
...@@ -209,7 +209,7 @@ class XLNetTokenizerFast(PreTrainedTokenizerFast): ...@@ -209,7 +209,7 @@ class XLNetTokenizerFast(PreTrainedTokenizerFast):
if token_ids_1 is not None: if token_ids_1 is not None:
raise ValueError( raise ValueError(
"You should not supply a second sequence if the provided sequence of " "You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model." "ids is already formatted with special tokens for the model."
) )
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
......
...@@ -1524,8 +1524,6 @@ class Trainer: ...@@ -1524,8 +1524,6 @@ class Trainer:
method in the model or subclass and override this method. method in the model or subclass and override this method.
Args: Args:
model (:obj:`nn.Module`):
The model to evaluate.
inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`): inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
The inputs and targets of the model. The inputs and targets of the model.
......
...@@ -135,7 +135,7 @@ def torch_distributed_zero_first(local_rank: int): ...@@ -135,7 +135,7 @@ def torch_distributed_zero_first(local_rank: int):
class SequentialDistributedSampler(Sampler): class SequentialDistributedSampler(Sampler):
""" """
Distributed Sampler that subsamples indicies sequentially, making it easier to collate all results at the end. Distributed Sampler that subsamples indices sequentially, making it easier to collate all results at the end.
Even though we only use this sampler for eval and predict (no training), which means that the model params won't Even though we only use this sampler for eval and predict (no training), which means that the model params won't
have to be synced (i.e. will not hang for synchronization even if varied number of forward passes), we still add have to be synced (i.e. will not hang for synchronization even if varied number of forward passes), we still add
......
...@@ -402,7 +402,7 @@ class TrainingArguments: ...@@ -402,7 +402,7 @@ class TrainingArguments:
n_gpu = torch.cuda.device_count() n_gpu = torch.cuda.device_count()
else: else:
# Here, we'll use torch.distributed. # Here, we'll use torch.distributed.
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs # Initializes the distributed backend which will take care of synchronizing nodes/GPUs
torch.distributed.init_process_group(backend="nccl") torch.distributed.init_process_group(backend="nccl")
device = torch.device("cuda", self.local_rank) device = torch.device("cuda", self.local_rank)
n_gpu = 1 n_gpu = 1
......
...@@ -205,7 +205,7 @@ class NotebookTrainingTracker(NotebookProgressBar): ...@@ -205,7 +205,7 @@ class NotebookTrainingTracker(NotebookProgressBar):
num_steps (:obj:`int`): The number of steps during training. num_steps (:obj:`int`): The number of steps during training.
column_names (:obj:`List[str]`, `optional`): column_names (:obj:`List[str]`, `optional`):
The list of column names for the metrics table (will be infered from the first call to The list of column names for the metrics table (will be inferred from the first call to
:meth:`~transformers.utils.notebook.NotebookTrainingTracker.write_line` if not set). :meth:`~transformers.utils.notebook.NotebookTrainingTracker.write_line` if not set).
""" """
...@@ -246,7 +246,7 @@ class NotebookTrainingTracker(NotebookProgressBar): ...@@ -246,7 +246,7 @@ class NotebookTrainingTracker(NotebookProgressBar):
def add_child(self, total, prefix=None, width=300): def add_child(self, total, prefix=None, width=300):
""" """
Add a child progress bar disaplyed under the table of metrics. The child progress bar is returned (so it can be Add a child progress bar displayed under the table of metrics. The child progress bar is returned (so it can be
easily updated). easily updated).
Args: Args:
......
...@@ -45,7 +45,7 @@ from utils_squad import ( ...@@ -45,7 +45,7 @@ from utils_squad import (
write_predictions_extended, write_predictions_extended,
) )
# The follwing import is the official SQuAD evaluation script (2.0). # The following import is the official SQuAD evaluation script (2.0).
# You can remove it from the dependencies if you are using this script outside of the library # You can remove it from the dependencies if you are using this script outside of the library
# We've added it here for automated tests (see examples/test_examples.py file) # We've added it here for automated tests (see examples/test_examples.py file)
from utils_squad_evaluate import EVAL_OPTS from utils_squad_evaluate import EVAL_OPTS
......
...@@ -426,8 +426,8 @@ def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_ans ...@@ -426,8 +426,8 @@ def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_ans
# #
# However, this is not always possible. Consider the following: # However, this is not always possible. Consider the following:
# #
# Question: What country is the top exporter of electornics? # Question: What country is the top exporter of electronics?
# Context: The Japanese electronics industry is the lagest in the world. # Context: The Japanese electronics industry is the largest in the world.
# Answer: Japan # Answer: Japan
# #
# In this case, the annotator chose "Japan" as a character sub-span of # In this case, the annotator chose "Japan" as a character sub-span of
......
...@@ -57,7 +57,7 @@ class XxxConfig(PretrainedConfig): ...@@ -57,7 +57,7 @@ class XxxConfig(PretrainedConfig):
If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported. If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler. The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1): attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
max_position_embeddings (:obj:`int`, `optional`, defaults to 512): max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
......
...@@ -352,7 +352,7 @@ XXX_INPUTS_DOCSTRING = r""" ...@@ -352,7 +352,7 @@ XXX_INPUTS_DOCSTRING = r"""
@add_start_docstrings( @add_start_docstrings(
"The bare XXX Model transformer outputing raw hidden-states without any specific head on top.", "The bare XXX Model transformer outputting raw hidden-states without any specific head on top.",
XXX_START_DOCSTRING, XXX_START_DOCSTRING,
) )
class TFXxxModel(TFXxxPreTrainedModel): class TFXxxModel(TFXxxPreTrainedModel):
......
...@@ -237,7 +237,7 @@ class XxxTokenizer(PreTrainedTokenizer): ...@@ -237,7 +237,7 @@ class XxxTokenizer(PreTrainedTokenizer):
if token_ids_1 is not None: if token_ids_1 is not None:
raise ValueError( raise ValueError(
"You should not supply a second sequence if the provided sequence of " "You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model." "ids is already formatted with special tokens for the model."
) )
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0)) return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
......
...@@ -34,7 +34,7 @@ class HfArgumentParserTest(unittest.TestCase): ...@@ -34,7 +34,7 @@ class HfArgumentParserTest(unittest.TestCase):
logger = logging.get_logger("transformers.tokenization_bart") logger = logging.get_logger("transformers.tokenization_bart")
msg = "Testing 1, 2, 3" msg = "Testing 1, 2, 3"
# should be able to log warnings (if default settings weren't overriden by `pytest --log-level-all`) # should be able to log warnings (if default settings weren't overridden by `pytest --log-level-all`)
if level_origin <= logging.WARNING: if level_origin <= logging.WARNING:
with CaptureLogger(logger) as cl: with CaptureLogger(logger) as cl:
logger.warn(msg) logger.warn(msg)
......
...@@ -1130,7 +1130,7 @@ class UtilsFunctionsTest(unittest.TestCase): ...@@ -1130,7 +1130,7 @@ class UtilsFunctionsTest(unittest.TestCase):
2.12662941, 2.12662941,
-9.32562038, -9.32562038,
2.35652522, 2.35652522,
], # cummulative prob of 5 highest values <= 0.6 ], # cumulative prob of 5 highest values <= 0.6
[ [
0.58425518, 0.58425518,
4.53139238, 4.53139238,
...@@ -1162,7 +1162,7 @@ class UtilsFunctionsTest(unittest.TestCase): ...@@ -1162,7 +1162,7 @@ class UtilsFunctionsTest(unittest.TestCase):
9.67702323, # 1st highest value; idx. 27 9.67702323, # 1st highest value; idx. 27
-5.89478553, -5.89478553,
1.85370467, 1.85370467,
], # cummulative prob of 5 highest values <= 0.6 ], # cumulative prob of 5 highest values <= 0.6
], ],
dtype=torch.float, dtype=torch.float,
device=torch_device, device=torch_device,
......
...@@ -497,7 +497,7 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -497,7 +497,7 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
return_obj_labels="PreTraining" in model_class.__name__ return_obj_labels="PreTraining" in model_class.__name__
) )
pt_model_class_name = model_class.__name__[2:] # Skip the "TF" at the beggining pt_model_class_name = model_class.__name__[2:] # Skip the "TF" at the beginning
pt_model_class = getattr(transformers, pt_model_class_name) pt_model_class = getattr(transformers, pt_model_class_name)
config.output_hidden_states = True config.output_hidden_states = True
......
...@@ -100,7 +100,7 @@ def is_copy_consistent(filename, overwrite=False): ...@@ -100,7 +100,7 @@ def is_copy_consistent(filename, overwrite=False):
lines = f.readlines() lines = f.readlines()
diffs = [] diffs = []
line_index = 0 line_index = 0
# Not a foor loop cause `lines` is going to change (if `overwrite=True`). # Not a for loop cause `lines` is going to change (if `overwrite=True`).
while line_index < len(lines): while line_index < len(lines):
search = _re_copy_warning.search(lines[line_index]) search = _re_copy_warning.search(lines[line_index])
if search is None: if search is None:
...@@ -164,9 +164,9 @@ def check_copies(overwrite: bool = False): ...@@ -164,9 +164,9 @@ def check_copies(overwrite: bool = False):
if not overwrite and len(diffs) > 0: if not overwrite and len(diffs) > 0:
diff = "\n".join(diffs) diff = "\n".join(diffs)
raise Exception( raise Exception(
"Found the follwing copy inconsistencies:\n" "Found the following copy inconsistencies:\n"
+ diff + diff
+ "\nRun `make fix-copies` or `python utils/check_copies --fix_and_overwrite` to fix them." + "\nRun `make fix-copies` or `python utils/check_copies.py --fix_and_overwrite` to fix them."
) )
check_model_list_copy(overwrite=overwrite) check_model_list_copy(overwrite=overwrite)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment