Unverified Commit 5f3bf651 authored by NielsRogge's avatar NielsRogge Committed by GitHub
Browse files

Fix EncoderDecoderModel docs (#14197)

* Fix docs

* Apply suggestions from review + fix bug
parent ac12a5ae
...@@ -98,9 +98,9 @@ ENCODER_DECODER_INPUTS_DOCSTRING = r""" ...@@ -98,9 +98,9 @@ ENCODER_DECODER_INPUTS_DOCSTRING = r"""
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
:obj:`past_key_values`). :obj:`past_key_values`).
Provide for sequence to sequence training to the decoder. Indices can be obtained using For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
:meth:`transformers.PreTrainedTokenizer.__call__` for details. :obj:`decoder_start_token_id`.
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`): decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
also be used by default. also be used by default.
...@@ -425,12 +425,14 @@ class EncoderDecoderModel(PreTrainedModel): ...@@ -425,12 +425,14 @@ class EncoderDecoderModel(PreTrainedModel):
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert from pre-trained checkpoints >>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert from pre-trained checkpoints
>>> # forward
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
>>> # training >>> # training
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids) >>> model.config.decoder_start_token_id = tokenizer.cls_token_id
>>> model.config.pad_token_id = tokenizer.pad_token_id
>>> model.config.vocab_size = model.config.decoder.vocab_size
>>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt").input_ids
>>> labels = tokenizer("Salut, mon chien est mignon", return_tensors="pt").input_ids
>>> outputs = model(input_ids=input_ids, labels=input_ids)
>>> loss, logits = outputs.loss, outputs.logits >>> loss, logits = outputs.loss, outputs.logits
>>> # save and load from pretrained >>> # save and load from pretrained
......
...@@ -103,9 +103,9 @@ SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING = r""" ...@@ -103,9 +103,9 @@ SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
:obj:`past_key_values`). :obj:`past_key_values`).
Provide for sequence to sequence training to the decoder. Indices can be obtained using For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
:meth:`transformers.PreTrainedTokenizer.__call__` for details. :obj:`decoder_start_token_id`.
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`): decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
also be used by default. also be used by default.
...@@ -424,25 +424,19 @@ class SpeechEncoderDecoderModel(PreTrainedModel): ...@@ -424,25 +424,19 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
Examples:: Examples::
>>> from transformers import SpeechEncoderDecoderModel, Speech2Text2Processor >>> from transformers import SpeechEncoderDecoderModel, Speech2Text2Processor
>>> from datasets import load_dataset
>>> import torch >>> import torch
>>> processor = Speech2Text2Processor.from_pretrained('facebook/s2t-wav2vec2-large-en-de') >>> processor = Speech2Text2Processor.from_pretrained('facebook/s2t-wav2vec2-large-en-de')
>>> model = SpeechEncoderDecoderModel.from_pretrained('facebook/s2t-wav2vec2-large-en-de') >>> model = SpeechEncoderDecoderModel.from_pretrained('facebook/s2t-wav2vec2-large-en-de')
>>> # process dataset
>>> def map_to_array(batch):
>>> speech, _ = sf.read(batch["file"])
>>> batch["speech"] = speech
>>> return batch
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1 >>> input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
>>> decoder_input_ids = torch.tensor([[model.config.decoder.decoder_start_token_id]]) >>> decoder_input_ids = torch.tensor([[model.config.decoder.decoder_start_token_id]])
>>> outputs = model(input_values=input_values, decoder_input_ids=decoder_input_ids) >>> outputs = model(input_values=input_values, decoder_input_ids=decoder_input_ids)
>>> # generation >>> # inference (generation)
>>> generated = model.generate(input_values) >>> generated = model.generate(input_values)
>>> translation = processor.batch_decode(generated) >>> translation = processor.batch_decode(generated)
......
...@@ -113,9 +113,9 @@ VISION_ENCODER_DECODER_INPUTS_DOCSTRING = r""" ...@@ -113,9 +113,9 @@ VISION_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see If :obj:`past_key_values` is used, optionally only the last :obj:`decoder_input_ids` have to be input (see
:obj:`past_key_values`). :obj:`past_key_values`).
Provide for sequence to sequence training to the decoder. Indices can be obtained using For training, :obj:`decoder_input_ids` are automatically created by the model by shifting the :obj:`labels`
:class:`~transformers.PreTrainedTokenizer`. See :meth:`transformers.PreTrainedTokenizer.encode` and to the right, replacing -100 by the :obj:`pad_token_id` and prepending them with the
:meth:`transformers.PreTrainedTokenizer.__call__` for details. :obj:`decoder_start_token_id`.
decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`): decoder_attention_mask (:obj:`torch.BoolTensor` of shape :obj:`(batch_size, target_sequence_length)`, `optional`):
Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will Default behavior: generate a tensor that ignores pad tokens in :obj:`decoder_input_ids`. Causal mask will
also be used by default. also be used by default.
...@@ -428,9 +428,15 @@ class VisionEncoderDecoderModel(PreTrainedModel): ...@@ -428,9 +428,15 @@ class VisionEncoderDecoderModel(PreTrainedModel):
>>> image = Image.open(requests.get(url, stream=True).raw).convert("RGB") >>> image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
>>> # training >>> # training
>>> pixel_values = processor(image, return_tensors="pt").pixel_values # Batch size 1 >>> model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
>>> decoder_input_ids = torch.tensor([[model.config.decoder.decoder_start_token_id]]) >>> model.config.pad_token_id = processor.tokenizer.pad_token_id
>>> outputs = model(pixel_values=pixel_values, decoder_input_ids=decoder_input_ids) >>> model.config.vocab_size = model.config.decoder.vocab_size
>>> pixel_values = processor(image, return_tensors="pt").pixel_values
>>> text = "hello world"
>>> labels = processor.tokenizer(text, return_tensors="pt").input_ids
>>> outputs = model(pixel_values=pixel_values, labels=labels)
>>> loss = outputs.loss
>>> # inference (generation) >>> # inference (generation)
>>> generated_ids = model.generate(pixel_values) >>> generated_ids = model.generate(pixel_values)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment