".github/vscode:/vscode.git/clone" did not exist on "eb30a49b2028f2411514c10e432792ad581fc08b"
Unverified Commit d0b942d1 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

fix more doctests (#22292)



* fix more doctests

* fix style

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 48327c57
......@@ -58,12 +58,14 @@ class MvpTokenizerFast(PreTrainedTokenizerFast):
This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will
be encoded differently whether it is at the beginning of the sentence (without space) or not:
```
```python
>>> from transformers import MvpTokenizerFast
>>> tokenizer = MvpTokenizerFast.from_pretrained("RUCAIBox/mvp")
>>> tokenizer("Hello world")['input_ids']
>>> tokenizer("Hello world")["input_ids"]
[0, 31414, 232, 2]
>>> tokenizer(" Hello world")['input_ids']
>>> tokenizer(" Hello world")["input_ids"]
[0, 20920, 232, 2]
```
......
......@@ -111,12 +111,14 @@ class RobertaTokenizer(PreTrainedTokenizer):
This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will
be encoded differently whether it is at the beginning of the sentence (without space) or not:
```
```python
>>> from transformers import RobertaTokenizer
>>> tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
>>> tokenizer("Hello world")['input_ids']
>>> tokenizer("Hello world")["input_ids"]
[0, 31414, 232, 2]
>>> tokenizer(" Hello world")['input_ids']
>>> tokenizer(" Hello world")["input_ids"]
[0, 20920, 232, 2]
```
......
......@@ -81,12 +81,14 @@ class RobertaTokenizerFast(PreTrainedTokenizerFast):
This tokenizer has been trained to treat spaces like parts of the tokens (a bit like sentencepiece) so a word will
be encoded differently whether it is at the beginning of the sentence (without space) or not:
```
```python
>>> from transformers import RobertaTokenizerFast
>>> tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")
>>> tokenizer("Hello world")['input_ids']
>>> tokenizer("Hello world")["input_ids"]
[0, 31414, 232, 2]
>>> tokenizer(" Hello world")['input_ids']
>>> tokenizer(" Hello world")["input_ids"]
[0, 20920, 232, 2]
```
......
......@@ -444,3 +444,26 @@ src/transformers/models/wav2vec2/processing_wav2vec2.py
src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py
src/transformers/models/whisper/processing_whisper.py
src/transformers/models/x_clip/processing_x_clip.py
src/transformers/models/bart/tokenization_bart.py
src/transformers/models/bart/tokenization_bart_fast.py
src/transformers/models/blenderbot/tokenization_blenderbot.py
src/transformers/models/blenderbot/tokenization_blenderbot_fast.py
src/transformers/models/bloom/tokenization_bloom_fast.py
src/transformers/models/codegen/tokenization_codegen.py
src/transformers/models/codegen/tokenization_codegen_fast.py
src/transformers/models/deberta/tokenization_deberta.py
src/transformers/models/deberta/tokenization_deberta_fast.py
src/transformers/models/gpt2/tokenization_gpt2.py
src/transformers/models/gpt2/tokenization_gpt2_fast.py
src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py
src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py
src/transformers/models/jukebox/tokenization_jukebox.py
src/transformers/models/led/tokenization_led.py
src/transformers/models/led/tokenization_led_fast.py
src/transformers/models/longformer/tokenization_longformer.py
src/transformers/models/longformer/tokenization_longformer_fast.py
src/transformers/models/luke/tokenization_luke.py
src/transformers/models/mvp/tokenization_mvp.py
src/transformers/models/mvp/tokenization_mvp_fast.py
src/transformers/models/roberta/tokenization_roberta.py
src/transformers/models/roberta/tokenization_roberta_fast.py
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment