Unverified Commit b24ead87 authored by LSinev's avatar LSinev Committed by GitHub
Browse files

fix some typos in docs, comments, logging/errors (#11432)

parent e3e70f95
...@@ -2021,7 +2021,7 @@ def _calculate_aggregate_mask(answer, pooled_output, cell_selection_preference, ...@@ -2021,7 +2021,7 @@ def _calculate_aggregate_mask(answer, pooled_output, cell_selection_preference,
apply to numbers. If the answer is a number but does not appear in the table then we must use some aggregation apply to numbers. If the answer is a number but does not appear in the table then we must use some aggregation
case. The ambiguous case is when the answer is a number that also appears in the table. In this case we use the case. The ambiguous case is when the answer is a number that also appears in the table. In this case we use the
aggregation function probabilities predicted by the model to decide whether to select or aggregate. The threshold aggregation function probabilities predicted by the model to decide whether to select or aggregate. The threshold
for this is a hyperparameter `cell_selection_preference for this is a hyperparameter `cell_selection_preference`
Args: Args:
answer (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, )`): answer (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, )`):
...@@ -2041,7 +2041,7 @@ def _calculate_aggregate_mask(answer, pooled_output, cell_selection_preference, ...@@ -2041,7 +2041,7 @@ def _calculate_aggregate_mask(answer, pooled_output, cell_selection_preference,
aggregate_mask_init = torch.logical_not(torch.isnan(answer)).type(torch.FloatTensor).to(answer.device) aggregate_mask_init = torch.logical_not(torch.isnan(answer)).type(torch.FloatTensor).to(answer.device)
logits_aggregation = aggregation_classifier(pooled_output) logits_aggregation = aggregation_classifier(pooled_output)
dist_aggregation = torch.distributions.categorical.Categorical(logits=logits_aggregation) dist_aggregation = torch.distributions.categorical.Categorical(logits=logits_aggregation)
# Index 0 correponds to "no aggregation". # Index 0 corresponds to "no aggregation".
aggregation_ops_total_mass = torch.sum(dist_aggregation.probs[:, 1:], dim=1) aggregation_ops_total_mass = torch.sum(dist_aggregation.probs[:, 1:], dim=1)
# Cell selection examples according to current model. # Cell selection examples according to current model.
...@@ -2126,7 +2126,7 @@ def _calculate_aggregation_loss_unknown(logits_aggregation, aggregate_mask): ...@@ -2126,7 +2126,7 @@ def _calculate_aggregation_loss_unknown(logits_aggregation, aggregate_mask):
answer supervision) per example. answer supervision) per example.
""" """
dist_aggregation = torch.distributions.categorical.Categorical(logits=logits_aggregation) dist_aggregation = torch.distributions.categorical.Categorical(logits=logits_aggregation)
# Index 0 correponds to "no aggregation". # Index 0 corresponds to "no aggregation".
aggregation_ops_total_mass = torch.sum(dist_aggregation.probs[:, 1:], dim=1) aggregation_ops_total_mass = torch.sum(dist_aggregation.probs[:, 1:], dim=1)
# Predict some aggregation in case of an answer that needs aggregation. # Predict some aggregation in case of an answer that needs aggregation.
# This increases the probability of all aggregation functions, in a way # This increases the probability of all aggregation functions, in a way
......
...@@ -2357,7 +2357,7 @@ def _get_numeric_value_from_date(date, mask): ...@@ -2357,7 +2357,7 @@ def _get_numeric_value_from_date(date, mask):
def _get_span_length_key(span): def _get_span_length_key(span):
"""Sorts span by decreasing length first and incresing first index second.""" """Sorts span by decreasing length first and increasing first index second."""
return span[1] - span[0], -span[0] return span[1] - span[0], -span[0]
......
...@@ -292,7 +292,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer): ...@@ -292,7 +292,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
elif "<unk>" in self.sym2idx: elif "<unk>" in self.sym2idx:
self.unk_idx = self.sym2idx["<unk>"] self.unk_idx = self.sym2idx["<unk>"]
else: else:
raise ValueError("No <unkown> token in vocabulary") raise ValueError("No <unknown> token in vocabulary")
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
if os.path.isdir(save_directory): if os.path.isdir(save_directory):
......
...@@ -382,9 +382,9 @@ class Wav2Vec2Attention(nn.Module): ...@@ -382,9 +382,9 @@ class Wav2Vec2Attention(nn.Module):
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
if output_attentions: if output_attentions:
# this operation is a bit akward, but it's required to # this operation is a bit awkward, but it's required to
# make sure that attn_weights keeps its gradient. # make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped # In order to do so, attn_weights have to be reshaped
# twice and have to be reused in the following # twice and have to be reused in the following
attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len) attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
......
...@@ -111,7 +111,7 @@ class Wav2Vec2Processor: ...@@ -111,7 +111,7 @@ class Wav2Vec2Processor:
When used in normal mode, this method forwards all its arguments to Wav2Vec2FeatureExtractor's When used in normal mode, this method forwards all its arguments to Wav2Vec2FeatureExtractor's
:meth:`~transformers.Wav2Vec2FeatureExtractor.__call__` and returns its output. If used in the context :meth:`~transformers.Wav2Vec2FeatureExtractor.__call__` and returns its output. If used in the context
:meth:`~transformers.Wav2Vec2Processor.as_target_processor` this method forwards all its arguments to :meth:`~transformers.Wav2Vec2Processor.as_target_processor` this method forwards all its arguments to
Wav2Vec2CTCTokenizer's :meth:`~transformers.Wav2Vec2CTCTokenizer.__call__`. Please refer to the doctsring of Wav2Vec2CTCTokenizer's :meth:`~transformers.Wav2Vec2CTCTokenizer.__call__`. Please refer to the docstring of
the above two methods for more information. the above two methods for more information.
""" """
return self.current_processor(*args, **kwargs) return self.current_processor(*args, **kwargs)
......
...@@ -348,7 +348,7 @@ XLM_INPUTS_DOCSTRING = r""" ...@@ -348,7 +348,7 @@ XLM_INPUTS_DOCSTRING = r"""
A parallel sequence of tokens to be used to indicate the language of each token in the input. Indices are A parallel sequence of tokens to be used to indicate the language of each token in the input. Indices are
languages ids which can be obtained from the language names by using two conversion mappings provided in languages ids which can be obtained from the language names by using two conversion mappings provided in
the configuration of the model (only provided for multilingual models). More precisely, the `language name the configuration of the model (only provided for multilingual models). More precisely, the `language name
to language id` mapping is in :obj:`model.config.lang2id` (which is a dictionary strring to int) and the to language id` mapping is in :obj:`model.config.lang2id` (which is a dictionary string to int) and the
`language id to language name` mapping is in :obj:`model.config.id2lang` (dictionary int to string). `language id to language name` mapping is in :obj:`model.config.id2lang` (dictionary int to string).
See usage examples detailed in the :doc:`multilingual documentation <../multilingual>`. See usage examples detailed in the :doc:`multilingual documentation <../multilingual>`.
...@@ -1188,7 +1188,7 @@ class XLMForMultipleChoice(XLMPreTrainedModel): ...@@ -1188,7 +1188,7 @@ class XLMForMultipleChoice(XLMPreTrainedModel):
self.init_weights() self.init_weights()
@add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choicec, sequence_length")) @add_start_docstrings_to_model_forward(XLM_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
tokenizer_class=_TOKENIZER_FOR_DOC, tokenizer_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
......
...@@ -749,8 +749,8 @@ class XLMTokenizer(PreTrainedTokenizer): ...@@ -749,8 +749,8 @@ class XLMTokenizer(PreTrainedTokenizer):
def _tokenize(self, text, lang="en", bypass_tokenizer=False): def _tokenize(self, text, lang="en", bypass_tokenizer=False):
""" """
Tokenize a string given language code. For Chinese, Japanese and Thai, we use a language specific Tokenize a string given language code. For Chinese, Japanese and Thai, we use a language specific tokenizer.
tokenizerself. Otherwise, we use Moses. Otherwise, we use Moses.
Details of tokenization: Details of tokenization:
......
...@@ -1113,7 +1113,7 @@ XLNET_INPUTS_DOCSTRING = r""" ...@@ -1113,7 +1113,7 @@ XLNET_INPUTS_DOCSTRING = r"""
Mask values selected in ``[0, 1]``: Mask values selected in ``[0, 1]``:
- 1 for tokens that are **masked**, - 1 for tokens that are **masked**,
- 0 for tokens that are **not maked**. - 0 for tokens that are **not masked**.
You can only uses one of :obj:`input_mask` and :obj:`attention_mask`. You can only uses one of :obj:`input_mask` and :obj:`attention_mask`.
head_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`): head_mask (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`):
......
...@@ -182,7 +182,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): ...@@ -182,7 +182,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
name (:obj:`str`, `optional`, defaults to 'AdamWeightDecay'): name (:obj:`str`, `optional`, defaults to 'AdamWeightDecay'):
Optional name for the operations created when applying gradients. Optional name for the operations created when applying gradients.
kwargs: kwargs:
Keyward arguments. Allowed to be {``clipnorm``, ``clipvalue``, ``lr``, ``decay``}. ``clipnorm`` is clip Keyword arguments. Allowed to be {``clipnorm``, ``clipvalue``, ``lr``, ``decay``}. ``clipnorm`` is clip
gradients by norm; ``clipvalue`` is clip gradients by value, ``decay`` is included for backward gradients by norm; ``clipvalue`` is clip gradients by value, ``decay`` is included for backward
compatibility to allow time inverse decay of learning rate. ``lr`` is included for backward compatibility, compatibility to allow time inverse decay of learning rate. ``lr`` is included for backward compatibility,
recommended to use ``learning_rate`` instead. recommended to use ``learning_rate`` instead.
......
...@@ -128,7 +128,7 @@ class Conversation: ...@@ -128,7 +128,7 @@ class Conversation:
""" """
Iterates over all blobs of the conversation. Iterates over all blobs of the conversation.
Retuns: Iterator of (is_user, text_chunk) in chronological order of the conversation. ``is_user`` is a Returns: Iterator of (is_user, text_chunk) in chronological order of the conversation. ``is_user`` is a
:obj:`bool`, ``text_chunks`` is a :obj:`str`. :obj:`bool`, ``text_chunks`` is a :obj:`str`.
""" """
for user_input, generated_response in zip(self.past_user_inputs, self.generated_responses): for user_input, generated_response in zip(self.past_user_inputs, self.generated_responses):
......
...@@ -48,7 +48,7 @@ class Text2TextGenerationPipeline(Pipeline): ...@@ -48,7 +48,7 @@ class Text2TextGenerationPipeline(Pipeline):
def check_inputs(self, input_length: int, min_length: int, max_length: int): def check_inputs(self, input_length: int, min_length: int, max_length: int):
""" """
Checks wether there might be something wrong with given input with regard to the model. Checks whether there might be something wrong with given input with regard to the model.
""" """
return True return True
...@@ -204,7 +204,7 @@ class SummarizationPipeline(Text2TextGenerationPipeline): ...@@ -204,7 +204,7 @@ class SummarizationPipeline(Text2TextGenerationPipeline):
def check_inputs(self, input_length: int, min_length: int, max_length: int) -> bool: def check_inputs(self, input_length: int, min_length: int, max_length: int) -> bool:
""" """
Checks wether there might be something wrong with given input with regard to the model. Checks whether there might be something wrong with given input with regard to the model.
""" """
if input_length < min_length // 2: if input_length < min_length // 2:
logger.warning( logger.warning(
......
...@@ -520,7 +520,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): ...@@ -520,7 +520,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
filename_prefix: Optional[str] = None, filename_prefix: Optional[str] = None,
) -> Tuple[str]: ) -> Tuple[str]:
""" """
Save a tokenizer using the slow-tokenizer/legacy format: vocabulary + added tokens as well asin a unique JSON Save a tokenizer using the slow-tokenizer/legacy format: vocabulary + added tokens as well as in a unique JSON
file containing {config + vocab + added-tokens}. file containing {config + vocab + added-tokens}.
""" """
save_directory = str(save_directory) save_directory = str(save_directory)
......
...@@ -208,7 +208,7 @@ class Trainer: ...@@ -208,7 +208,7 @@ class Trainer:
Note that if it's a :obj:`torch.utils.data.dataset.IterableDataset` with some randomization and you are Note that if it's a :obj:`torch.utils.data.dataset.IterableDataset` with some randomization and you are
training in a distributed fashion, your iterable dataset should either use a internal attribute training in a distributed fashion, your iterable dataset should either use a internal attribute
:obj:`generator` that is a :obj:`torch.Generator` for the randomization that must be identic on all :obj:`generator` that is a :obj:`torch.Generator` for the randomization that must be identical on all
processes (and the Trainer will manually set the seed of this :obj:`generator` at each epoch) or have a processes (and the Trainer will manually set the seed of this :obj:`generator` at each epoch) or have a
:obj:`set_epoch()` method that internally sets the seed of the RNGs used. :obj:`set_epoch()` method that internally sets the seed of the RNGs used.
eval_dataset (:obj:`torch.utils.data.dataset.Dataset`, `optional`): eval_dataset (:obj:`torch.utils.data.dataset.Dataset`, `optional`):
......
...@@ -42,7 +42,7 @@ class TrainerState: ...@@ -42,7 +42,7 @@ class TrainerState:
In all this class, one step is to be understood as one update step. When using gradient accumulation, one In all this class, one step is to be understood as one update step. When using gradient accumulation, one
update step may require several forward and backward passes: if you use :obj:`gradient_accumulation_steps=n`, update step may require several forward and backward passes: if you use :obj:`gradient_accumulation_steps=n`,
then one update step requires going throuch `n` batches. then one update step requires going through `n` batches.
Args: Args:
epoch (:obj:`float`, `optional`): epoch (:obj:`float`, `optional`):
......
...@@ -243,7 +243,7 @@ class SequentialDistributedSampler(Sampler): ...@@ -243,7 +243,7 @@ class SequentialDistributedSampler(Sampler):
def __init__(self, dataset, num_replicas=None, rank=None, batch_size=None): def __init__(self, dataset, num_replicas=None, rank=None, batch_size=None):
warnings.warn( warnings.warn(
"SequentialDistributedSampler is deprecated and will be removed in v5 of Tranformers.", "SequentialDistributedSampler is deprecated and will be removed in v5 of Transformers.",
FutureWarning, FutureWarning,
) )
if num_replicas is None: if num_replicas is None:
...@@ -363,7 +363,7 @@ class DistributedTensorGatherer: ...@@ -363,7 +363,7 @@ class DistributedTensorGatherer:
def __init__(self, world_size, num_samples, make_multiple_of=None, padding_index=-100): def __init__(self, world_size, num_samples, make_multiple_of=None, padding_index=-100):
warnings.warn( warnings.warn(
"DistributedTensorGatherer is deprecated and will be removed in v5 of Tranformers.", "DistributedTensorGatherer is deprecated and will be removed in v5 of Transformers.",
FutureWarning, FutureWarning,
) )
self.world_size = world_size self.world_size = world_size
......
...@@ -199,7 +199,7 @@ class Seq2SeqTrainer(Trainer): ...@@ -199,7 +199,7 @@ class Seq2SeqTrainer(Trainer):
def _pad_tensors_to_max_len(self, tensor, max_length): def _pad_tensors_to_max_len(self, tensor, max_length):
if self.tokenizer is None: if self.tokenizer is None:
raise ValueError( raise ValueError(
f"Tensor need to be padded to `max_length={max_length}` but no tokenzier was passed when creating " f"Tensor need to be padded to `max_length={max_length}` but no tokenizer was passed when creating "
"this `Trainer`. Make sure to create your `Trainer` with the appropriate tokenizer." "this `Trainer`. Make sure to create your `Trainer` with the appropriate tokenizer."
) )
# If PAD token is not defined at least EOS token has to be defined # If PAD token is not defined at least EOS token has to be defined
......
...@@ -140,7 +140,7 @@ def get_verbosity() -> int: ...@@ -140,7 +140,7 @@ def get_verbosity() -> int:
def set_verbosity(verbosity: int) -> None: def set_verbosity(verbosity: int) -> None:
""" """
Set the vebosity level for the 🤗 Transformers's root logger. Set the verbosity level for the 🤗 Transformers's root logger.
Args: Args:
verbosity (:obj:`int`): verbosity (:obj:`int`):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment