"git@developer.sourcefind.cn:modelzoo/donut_pytorch.git" did not exist on "3c6d818db574efaad360359eea314b4ef7f73111"
Unverified Commit 89514f05 authored by Ekagra Ranjan's avatar Ekagra Ranjan Committed by GitHub
Browse files

Improve Text Generation doc (#18788)



* fix args for bram search decoding in generation utils

* fix missing PAD token in gpt2

* add PAD EOS change to TF

* Update src/transformers/generation_tf_utils.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/generation_utils.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/generation_utils.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 86387fe8
...@@ -2113,7 +2113,7 @@ class TFGenerationMixin: ...@@ -2113,7 +2113,7 @@ class TFGenerationMixin:
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2") >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
>>> model = TFAutoModelForCausalLM.from_pretrained("gpt2") >>> model = TFAutoModelForCausalLM.from_pretrained("gpt2")
>>> # set pad_token_id to eos_token_id because GPT2 does not have a EOS token >>> # set pad_token_id to eos_token_id because GPT2 does not have a PAD token
>>> model.config.pad_token_id = model.config.eos_token_id >>> model.config.pad_token_id = model.config.eos_token_id
>>> input_prompt = "Today is a beautiful day, and" >>> input_prompt = "Today is a beautiful day, and"
......
...@@ -1139,7 +1139,7 @@ class GenerationMixin: ...@@ -1139,7 +1139,7 @@ class GenerationMixin:
>>> sentence = "Paris is one of the densest populated areas in Europe." >>> sentence = "Paris is one of the densest populated areas in Europe."
>>> input_ids = tokenizer(sentence, return_tensors="pt").input_ids >>> input_ids = tokenizer(sentence, return_tensors="pt").input_ids
>>> outputs = model.generate(input_ids) >>> outputs = model.generate(input_ids, num_beams=5)
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True) >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['Paris ist eines der dichtesten besiedelten Gebiete Europas.'] ['Paris ist eines der dichtesten besiedelten Gebiete Europas.']
```""" ```"""
...@@ -1635,7 +1635,7 @@ class GenerationMixin: ...@@ -1635,7 +1635,7 @@ class GenerationMixin:
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2") >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
>>> model = AutoModelForCausalLM.from_pretrained("gpt2") >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
>>> # set pad_token_id to eos_token_id because GPT2 does not have a EOS token >>> # set pad_token_id to eos_token_id because GPT2 does not have a PAD token
>>> model.config.pad_token_id = model.config.eos_token_id >>> model.config.pad_token_id = model.config.eos_token_id
>>> input_prompt = "It might be possible to" >>> input_prompt = "It might be possible to"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment