Unverified Commit b24ead87 authored by LSinev's avatar LSinev Committed by GitHub
Browse files

fix some typos in docs, comments, logging/errors (#11432)

parent e3e70f95
......@@ -57,14 +57,14 @@ class AddNewModelCommand(BaseTransformersCLICommand):
if not _has_cookiecutter:
raise ImportError(
"Model creation dependencies are required to use the `add_new_model` command. Install them by running "
"the folowing at the root of your `transformers` clone:\n\n\t$ pip install -e .[modelcreation]\n"
"the following at the root of your `transformers` clone:\n\n\t$ pip install -e .[modelcreation]\n"
)
# Ensure that there is no other `cookiecutter-template-xxx` directory in the current working directory
directories = [directory for directory in os.listdir() if "cookiecutter-template-" == directory[:22]]
if len(directories) > 0:
raise ValueError(
"Several directories starting with `cookiecutter-template-` in current working directory. "
"Please clean your directory by removing all folders startign with `cookiecutter-template-` or "
"Please clean your directory by removing all folders starting with `cookiecutter-template-` or "
"change your working directory."
)
......
......@@ -244,7 +244,7 @@ def squad_convert_example_to_features(
cls_index = span["input_ids"].index(tokenizer.cls_token_id)
# p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
# Original TF implem also keep the classification token (set to 0)
# Original TF implementation also keep the classification token (set to 0)
p_mask = np.ones_like(span["token_type_ids"])
if tokenizer.padding_side == "right":
p_mask[len(truncated_query) + sequence_added_tokens :] = 0
......
......@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Sequence feature extraction class for common feature extrcactors to preprocess sequences.
Sequence feature extraction class for common feature extractors to preprocess sequences.
"""
from typing import Dict, List, Optional, Union
......
......@@ -551,7 +551,7 @@ BACKENDS_MAPPING = OrderedDict(
("sklearn", (is_sklearn_available, SKLEARN_IMPORT_ERROR)),
("speech", (is_speech_available, SPEECH_IMPORT_ERROR)),
("tf", (is_tf_available, TENSORFLOW_IMPORT_ERROR)),
("tokenziers", (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
("tokenizers", (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)),
("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)),
("vision", (is_vision_available, VISION_IMPORT_ERROR)),
]
......
......@@ -446,7 +446,7 @@ class NoBadWordsLogitsProcessor(LogitsProcessor):
class PrefixConstrainedLogitsProcessor(LogitsProcessor):
r"""
:class:`transformers.LogitsProcessor` that enforces contrained generation and is useful for prefix-conditioned
:class:`transformers.LogitsProcessor` that enforces constrained generation and is useful for prefix-conditioned
constrained generation. See `Autoregressive Entity Retrieval <https://arxiv.org/abs/2010.00904>`__ for more
information.
......
......@@ -23,7 +23,7 @@ STOPPING_CRITERIA_INPUTS_DOCSTRING = r"""
Prediction scores of a language modeling head. These can be scores for each vocabulary token before SoftMax
or scores for each vocabulary token after SoftMax.
kwargs:
Additional stopping critera specific kwargs.
Additional stopping criteria specific kwargs.
Return:
:obj:`bool`. :obj:`False` indicates we should continue, :obj:`True` indicates we should stop.
......
......@@ -442,8 +442,8 @@ class TFGenerationMixin:
**kwargs
):
"""
Generate sequences for each example without beam search (num_beams == 1). All returned sequence are generated
independantly.
Generate sequences for each example without beam search (num_beams == 1). All returned sequences are generated
independently.
"""
# length of generated sentences / unfinished sentences
......
......@@ -821,7 +821,7 @@ class GenerationMixin:
... "at least two people were killed in a suspected bomb attack on a passenger bus "
... "in the strife-torn southern philippines on monday , the military said."
... )
>>> # encode input contex
>>> # encode input context
>>> input_ids = tokenizer(document, return_tensors="pt").input_ids
>>> # generate 3 independent sequences using beam search decoding (5 beams)
>>> # with T5 encoder-decoder model conditioned on short news article.
......
......@@ -94,7 +94,7 @@ class FlaxPreTrainedModel(PushToHubMixin):
self.key = PRNGKey(seed)
self.dtype = dtype
# randomely initialized parameters
# randomly initialized parameters
random_params = self.init_weights(self.key, input_shape)
# save required_params as set
......
......@@ -343,7 +343,7 @@ class CausalLMOutputWithPast(ModelOutput):
Language modeling loss (for next-token prediction).
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
past_key_values (:obj:`tuple(tupel(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`)
......@@ -423,7 +423,7 @@ class SequenceClassifierOutputWithPast(ModelOutput):
Classification (or regression if config.num_labels==1) loss.
logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
Classification (or regression if config.num_labels==1) scores (before SoftMax).
past_key_values (:obj:`tuple(tupel(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
past_key_values (:obj:`tuple(tuple(torch.FloatTensor))`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
Tuple of :obj:`tuple(torch.FloatTensor)` of length :obj:`config.n_layers`, with each tuple having 2 tensors
of shape :obj:`(batch_size, num_heads, sequence_length, embed_size_per_head)`)
......
......@@ -51,7 +51,7 @@ def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_remove="")
) # '_._' is replaced by a level separation (can be used to convert TF2.0 lists in PyTorch nn.ModulesList)
tf_name = re.sub(r"//+", "/", tf_name) # Remove empty levels at the end
tf_name = tf_name.split("/") # Convert from TF2.0 '/' separators to PyTorch '.' separators
# Some weights have a single name withtout "/" such as final_logits_bias in BART
# Some weights have a single name without "/" such as final_logits_bias in BART
if len(tf_name) > 1:
tf_name = tf_name[1:] # Remove level zero
......
......@@ -659,7 +659,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
Args:
inputs (:obj:`Dict[str, tf.Tensor]`):
The input of the saved model as a dictionnary of tensors.
The input of the saved model as a dictionary of tensors.
"""
output = self.call(inputs)
......@@ -944,7 +944,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
vectors from the end. If not provided or :obj:`None`, just returns None
Return:
:obj:`tf.Variable`: Pointer to the resized decoder or None if the output embeddings are differents of the
:obj:`tf.Variable`: Pointer to the resized decoder or None if the output embeddings are different from the
input ones.
"""
new_lm_head_decoder = old_lm_head_decoder
......
......@@ -291,7 +291,7 @@ class ModuleUtilsMixin:
The mask indicating if we should keep the heads or not (1.0 for keep, 0.0 for discard).
num_hidden_layers (:obj:`int`):
The number of hidden layers in the model.
is_attention_chunked: (:obj:`bool`, `optional, defaults to :obj:`False`):
is_attention_chunked: (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not the attentions scores are computed by chunks or not.
Returns:
......
......@@ -716,7 +716,7 @@ AutoModelForPreTraining = auto_class_factory(
"AutoModelForPreTraining", MODEL_FOR_PRETRAINING_MAPPING, head_doc="pretraining"
)
# Private on puprose, the public class will add the deprecation warnings.
# Private on purpose, the public class will add the deprecation warnings.
_AutoModelWithLMHead = auto_class_factory(
"AutoModelWithLMHead", MODEL_WITH_LM_HEAD_MAPPING, head_doc="language modeling"
)
......
......@@ -103,7 +103,7 @@ FlaxAutoModelForMaskedLM = auto_class_factory(
)
FlaxAutoModelForSequenceClassification = auto_class_factory(
"AFlaxutoModelForSequenceClassification",
"FlaxAutoModelForSequenceClassification",
FLAX_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
head_doc="sequence classification",
)
......
......@@ -469,7 +469,7 @@ TFAutoModelForPreTraining = auto_class_factory(
"TFAutoModelForPreTraining", TF_MODEL_FOR_PRETRAINING_MAPPING, head_doc="pretraining"
)
# Private on puprose, the public class will add the deprecation warnings.
# Private on purpose, the public class will add the deprecation warnings.
_TFAutoModelWithLMHead = auto_class_factory(
"TFAutoModelWithLMHead", TF_MODEL_WITH_LM_HEAD_MAPPING, head_doc="language modeling"
)
......
......@@ -171,7 +171,7 @@ class BartConfig(PretrainedConfig):
self.gradient_checkpointing = gradient_checkpointing
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
# ensure backward compatibilty for BART CNN models
# ensure backward compatibility for BART CNN models
if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False):
self.forced_bos_token_id = self.bos_token_id
warnings.warn(
......
......@@ -111,7 +111,7 @@ class BartLearnedPositionalEmbedding(nn.Embedding):
def __init__(self, num_embeddings: int, embedding_dim: int):
# Bart is set up so that if padding_idx is specified then offset the embedding ids by 2
# and adjust num_embeddings appropriately. Other models dont have this hack
# and adjust num_embeddings appropriately. Other models don't have this hack
self.offset = 2
super().__init__(num_embeddings + self.offset, embedding_dim)
......@@ -236,9 +236,9 @@ class BartAttention(nn.Module):
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
if output_attentions:
# this operation is a bit akward, but it's required to
# this operation is a bit awkward, but it's required to
# make sure that attn_weights keeps its gradient.
# In order to do so, attn_weights have to reshaped
# In order to do so, attn_weights have to be reshaped
# twice and have to be reused in the following
attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
......
......@@ -116,7 +116,7 @@ class TFBartLearnedPositionalEmbedding(TFSharedEmbeddings):
def __init__(self, num_embeddings: int, embedding_dim: int, **kwargs):
# Bart is set up so that if padding_idx is specified then offset the embedding ids by 2
# and adjust num_embeddings appropriately. Other models dont have this hack
# and adjust num_embeddings appropriately. Other models don't have this hack
self.offset = 2
super().__init__(num_embeddings + self.offset, embedding_dim, **kwargs)
......
......@@ -304,7 +304,7 @@ class MecabTokenizer:
class CharacterTokenizer:
"""Runs Character tokenziation."""
"""Runs Character tokenization."""
def __init__(self, vocab, unk_token, normalize_text=True):
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment