Unverified Commit 645f45c4 authored by WybeKoper's avatar WybeKoper Committed by GitHub
Browse files

Fixed some typos and removed legacy url (#10989)



* Fixed typos

* Removed legacy colab notebook from readme
Co-authored-by: default avatarWybeKoper <WybeKoper@users.noreply.github.com>
parent e87505f3
...@@ -129,6 +129,3 @@ python ./examples/multiple-choice/run_tf_multiple_choice.py \ ...@@ -129,6 +129,3 @@ python ./examples/multiple-choice/run_tf_multiple_choice.py \
--gradient_accumulation_steps 2 \ --gradient_accumulation_steps 2 \
--overwrite_output --overwrite_output
``` ```
# Run it in colab
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ViktorAlm/notebooks/blob/master/MPC_GPU_Demo_for_TF_and_PT.ipynb)
...@@ -1302,10 +1302,10 @@ class GenerationMixin: ...@@ -1302,10 +1302,10 @@ class GenerationMixin:
# argmax # argmax
next_tokens = torch.argmax(next_tokens_scores, dim=-1) next_tokens = torch.argmax(next_tokens_scores, dim=-1)
# add code that transfomers next_tokens to tokens_to_add # add code that transforms next_tokens to tokens_to_add
if eos_token_id is not None: if eos_token_id is not None:
assert pad_token_id is not None, "If eos_token_id is defined, make sure that pad_token_id is defined." assert pad_token_id is not None, "If eos_token_id is defined, make sure that pad_token_id is defined."
next_tokens = next_tokens * unfinished_sequences + (pad_token_id) * (1 - unfinished_sequences) next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences)
# add token and increase length by one # add token and increase length by one
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
...@@ -1526,10 +1526,10 @@ class GenerationMixin: ...@@ -1526,10 +1526,10 @@ class GenerationMixin:
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
# add code that transfomers next_tokens to tokens_to_add # add code that transforms next_tokens to tokens_to_add
if eos_token_id is not None: if eos_token_id is not None:
assert pad_token_id is not None, "If eos_token_id is defined, make sure that pad_token_id is defined." assert pad_token_id is not None, "If eos_token_id is defined, make sure that pad_token_id is defined."
next_tokens = next_tokens * unfinished_sequences + (pad_token_id) * (1 - unfinished_sequences) next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences)
# add token and increase length by one # add token and increase length by one
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
......
...@@ -56,7 +56,7 @@ def load_vocab(vocab_file): ...@@ -56,7 +56,7 @@ def load_vocab(vocab_file):
class XLMProphetNetTokenizer(PreTrainedTokenizer): class XLMProphetNetTokenizer(PreTrainedTokenizer):
""" """
Adapted from :class:`~transfomers.RobertaTokenizer` and class:`~transfomers.XLNetTokenizer`. Based on Adapted from :class:`~transformers.RobertaTokenizer` and class:`~transformers.XLNetTokenizer`. Based on
`SentencePiece <https://github.com/google/sentencepiece>`__. `SentencePiece <https://github.com/google/sentencepiece>`__.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods. This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the main methods.
......
...@@ -67,7 +67,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -67,7 +67,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class XLMRobertaTokenizerFast(PreTrainedTokenizerFast): class XLMRobertaTokenizerFast(PreTrainedTokenizerFast):
""" """
Construct a "fast" XLM-RoBERTa tokenizer (backed by HuggingFace's `tokenizers` library). Adapted from Construct a "fast" XLM-RoBERTa tokenizer (backed by HuggingFace's `tokenizers` library). Adapted from
:class:`~transfomers.RobertaTokenizer` and class:`~transfomers.XLNetTokenizer`. Based on `BPE :class:`~transformers.RobertaTokenizer` and class:`~transformers.XLNetTokenizer`. Based on `BPE
<https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=BPE#models>`__. <https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=BPE#models>`__.
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main This tokenizer inherits from :class:`~transformers.PreTrainedTokenizerFast` which contains most of the main
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment