Unverified Commit a69e1850 authored by Suraj Patil's avatar Suraj Patil Committed by GitHub
Browse files

add doctests for bart like seq2seq models (#15987)



* boom boom

* enable doctest for few seq2seq models

* add seq2seq models in documentation_tests.txt

* fix docstring blenderbot

* Apply suggestions from code review
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Apply suggestions from code review
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* fix seq classif doc sample

* don't check loss for seq classif examples

* +IGNORE_OUTPUT => +IGNORE_RESULT

* fix _SEQ_CLASS_EXPECTED_OUTPUT_SHAPE

* fix some docs

* more fixes

* last fix (hopefully)

* fix big bird gen example

* fix mbart gen example
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent b256f351
...@@ -1012,6 +1012,8 @@ PT_QUESTION_ANSWERING_SAMPLE = r""" ...@@ -1012,6 +1012,8 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
>>> from transformers import {processor_class}, {model_class} >>> from transformers import {processor_class}, {model_class}
>>> import torch >>> import torch
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
>>> model = {model_class}.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}")
...@@ -1022,8 +1024,16 @@ PT_QUESTION_ANSWERING_SAMPLE = r""" ...@@ -1022,8 +1024,16 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
>>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions) >>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
>>> loss = outputs.loss >>> loss = outputs.loss
>>> round(loss.item(), 2)
{expected_loss}
>>> start_scores = outputs.start_logits >>> start_scores = outputs.start_logits
>>> list(start_scores.shape)
{expected_output}
>>> end_scores = outputs.end_logits >>> end_scores = outputs.end_logits
>>> list(end_scores.shape)
{expected_output}
``` ```
""" """
...@@ -1031,33 +1041,40 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r""" ...@@ -1031,33 +1041,40 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
Example of single-label classification: Example of single-label classification:
```python ```python
>>> from transformers import {processor_class}, {model_class}
>>> import torch >>> import torch
>>> from transformers import {processor_class}, {model_class}
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
>>> model = {model_class}.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=2)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(**inputs, labels=labels) >>> outputs = model(**inputs, labels=labels)
>>> loss = outputs.loss >>> loss = outputs.loss
>>> logits = outputs.logits >>> logits = outputs.logits
>>> list(logits.shape)
{expected_output}
``` ```
Example of multi-label classification: Example of multi-label classification:
```python ```python
>>> from transformers import {processor_class}, {model_class}
>>> import torch >>> import torch
>>> from transformers import {processor_class}, {model_class}
>>> torch.manual_seed(0) # doctest: +IGNORE_RESULT
>>> tokenizer = {processor_class}.from_pretrained("{checkpoint}") >>> tokenizer = {processor_class}.from_pretrained("{checkpoint}")
>>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification") >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification", num_labels=2)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([[1, 1]], dtype=torch.float) # need dtype=float for BCEWithLogitsLoss >>> labels = torch.tensor([[1, 1]], dtype=torch.float) # need dtype=float for BCEWithLogitsLoss
>>> outputs = model(**inputs, labels=labels) >>> outputs = model(**inputs, labels=labels)
>>> loss = outputs.loss >>> loss = outputs.loss
>>> logits = outputs.logits >>> list(logits.shape)
{expected_output}
``` ```
""" """
......
...@@ -48,14 +48,24 @@ from .configuration_bart import BartConfig ...@@ -48,14 +48,24 @@ from .configuration_bart import BartConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
_CHECKPOINT_FOR_DOC = "facebook/bart-large" _CHECKPOINT_FOR_DOC = "facebook/bart-base"
_CONFIG_FOR_DOC = "BartConfig" _CONFIG_FOR_DOC = "BartConfig"
_TOKENIZER_FOR_DOC = "BartTokenizer" _TOKENIZER_FOR_DOC = "BartTokenizer"
# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 8, 768]
# SequenceClassification docstring
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
# QuestionAsnwering docstring
_QA_EXPECTED_LOSS = 2.98
_QA_EXPECTED_OUTPUT_SHAPE = [1, 17]
BART_PRETRAINED_MODEL_ARCHIVE_LIST = [ BART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/bart-large", "facebook/bart-large",
# See all BART models at https://huggingface.co/models?filter=bart # see all BART models at https://huggingface.co/models?filter=bart
] ]
...@@ -542,12 +552,17 @@ BART_GENERATION_EXAMPLE = r""" ...@@ -542,12 +552,17 @@ BART_GENERATION_EXAMPLE = r"""
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> ARTICLE_TO_SUMMARIZE = (
... "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
... "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
... "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
... )
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt") >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt")
>>> # Generate Summary >>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) >>> summary_ids = model.generate(inputs["input_ids"], num_beams=2, max_length=20)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
'PG&E scheduled the blackouts in response to forecasts for high winds amid dry conditions'
``` ```
Mask filling example: Mask filling example:
...@@ -555,10 +570,10 @@ BART_GENERATION_EXAMPLE = r""" ...@@ -555,10 +570,10 @@ BART_GENERATION_EXAMPLE = r"""
```python ```python
>>> from transformers import BartTokenizer, BartForConditionalGeneration >>> from transformers import BartTokenizer, BartForConditionalGeneration
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
>>> TXT = "My friends are <mask> but they eat too many carbs." >>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")
>>> model = BartForConditionalGeneration.from_pretrained("facebook/bart-large") >>> TXT = "My friends are <mask> but they eat too many carbs."
>>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
>>> logits = model(input_ids).logits >>> logits = model(input_ids).logits
...@@ -567,6 +582,7 @@ BART_GENERATION_EXAMPLE = r""" ...@@ -567,6 +582,7 @@ BART_GENERATION_EXAMPLE = r"""
>>> values, predictions = probs.topk(5) >>> values, predictions = probs.topk(5)
>>> tokenizer.decode(predictions).split() >>> tokenizer.decode(predictions).split()
['not', 'good', 'healthy', 'great', 'very']
``` ```
""" """
...@@ -641,11 +657,10 @@ BART_INPUTS_DOCSTRING = r""" ...@@ -641,11 +657,10 @@ BART_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
...@@ -966,8 +981,8 @@ class BartDecoder(BartPretrainedModel): ...@@ -966,8 +981,8 @@ class BartDecoder(BartPretrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
...@@ -1153,6 +1168,7 @@ class BartModel(BartPretrainedModel): ...@@ -1153,6 +1168,7 @@ class BartModel(BartPretrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqModelOutput, output_type=Seq2SeqModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
...@@ -1434,6 +1450,7 @@ class BartForSequenceClassification(BartPretrainedModel): ...@@ -1434,6 +1450,7 @@ class BartForSequenceClassification(BartPretrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput, output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
...@@ -1558,6 +1575,8 @@ class BartForQuestionAnswering(BartPretrainedModel): ...@@ -1558,6 +1575,8 @@ class BartForQuestionAnswering(BartPretrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqQuestionAnsweringModelOutput, output_type=Seq2SeqQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_loss=_QA_EXPECTED_LOSS,
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
...@@ -1789,13 +1808,16 @@ class BartForCausalLM(BartPretrainedModel): ...@@ -1789,13 +1808,16 @@ class BartForCausalLM(BartPretrainedModel):
```python ```python
>>> from transformers import BartTokenizer, BartForCausalLM >>> from transformers import BartTokenizer, BartForCausalLM
>>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") >>> tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
>>> model = BartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False) >>> model = BartForCausalLM.from_pretrained("facebook/bart-base", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......
...@@ -53,6 +53,16 @@ _CHECKPOINT_FOR_DOC = "google/bigbird-pegasus-large-arxiv" ...@@ -53,6 +53,16 @@ _CHECKPOINT_FOR_DOC = "google/bigbird-pegasus-large-arxiv"
_CONFIG_FOR_DOC = "BigBirdPegasusConfig" _CONFIG_FOR_DOC = "BigBirdPegasusConfig"
_TOKENIZER_FOR_DOC = "PegasusTokenizer" _TOKENIZER_FOR_DOC = "PegasusTokenizer"
# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 7, 1024]
# SequenceClassification docstring
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
# QuestionAsnwering docstring
_QA_EXPECTED_LOSS = 2.56
_QA_EXPECTED_OUTPUT_SHAPE = [1, 12]
BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST = [ BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/bigbird-pegasus-large-arxiv", "google/bigbird-pegasus-large-arxiv",
...@@ -1627,12 +1637,20 @@ BIGBIRD_PEGASUS_GENERATION_EXAMPLE = r""" ...@@ -1627,12 +1637,20 @@ BIGBIRD_PEGASUS_GENERATION_EXAMPLE = r"""
>>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv") >>> model = BigBirdPegasusForConditionalGeneration.from_pretrained("google/bigbird-pegasus-large-arxiv")
>>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv") >>> tokenizer = PegasusTokenizer.from_pretrained("google/bigbird-pegasus-large-arxiv")
>>> ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." >>> ARTICLE_TO_SUMMARIZE = (
... "The dominant sequence transduction models are based on complex recurrent or convolutional neural "
... "networks in an encoder-decoder configuration. The best performing models also connect the encoder "
... "and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, "
... "based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. "
... "Experiments on two machine translation tasks show these models to be superior in quality "
... "while being more parallelizable and requiring significantly less time to train."
... )
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True) >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=4096, return_tensors="pt", truncation=True)
>>> # Generate Summary >>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) >>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=15)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
'dominant sequence models are based on recurrent or convolutional neural networks .'
``` ```
""" """
...@@ -1684,11 +1702,10 @@ BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r""" ...@@ -1684,11 +1702,10 @@ BIGBIRD_PEGASUS_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
...@@ -2159,8 +2176,8 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel): ...@@ -2159,8 +2176,8 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
...@@ -2346,6 +2363,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel): ...@@ -2346,6 +2363,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqModelOutput, output_type=Seq2SeqModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
...@@ -2630,6 +2648,7 @@ class BigBirdPegasusForSequenceClassification(BigBirdPegasusPreTrainedModel): ...@@ -2630,6 +2648,7 @@ class BigBirdPegasusForSequenceClassification(BigBirdPegasusPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput, output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
...@@ -2755,6 +2774,8 @@ class BigBirdPegasusForQuestionAnswering(BigBirdPegasusPreTrainedModel): ...@@ -2755,6 +2774,8 @@ class BigBirdPegasusForQuestionAnswering(BigBirdPegasusPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqQuestionAnsweringModelOutput, output_type=Seq2SeqQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_loss=_QA_EXPECTED_LOSS,
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
......
...@@ -506,20 +506,37 @@ BLENDERBOT_START_DOCSTRING = r""" ...@@ -506,20 +506,37 @@ BLENDERBOT_START_DOCSTRING = r"""
""" """
BLENDERBOT_GENERATION_EXAMPLE = r""" BLENDERBOT_GENERATION_EXAMPLE = r"""
Conversation example:: Conversation example:
>>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration >>> mname = ```python
'facebook/blenderbot-400M-distill' >>> model = BlenderbotForConditionalGeneration.from_pretrained(mname) >>> >>> from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
tokenizer = BlenderbotTokenizer.from_pretrained(mname) >>> UTTERANCE = "My friends are cool but they eat too
many carbs." >>> print("Human: ", UTTERANCE) >>> inputs = tokenizer([UTTERANCE], return_tensors='pt') >>> >>> mname = "facebook/blenderbot-400M-distill"
reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(reply_ids, >>> model = BlenderbotForConditionalGeneration.from_pretrained(mname)
skip_special_tokens=True)[0]) >>> tokenizer = BlenderbotTokenizer.from_pretrained(mname)
>>> UTTERANCE = "My friends are cool but they eat too many carbs."
>>> REPLY = "I'm not sure" >>> print("Human: ", REPLY) >>> NEXT_UTTERANCE = ( ... "My friends are cool but they >>> print("Human: ", UTTERANCE)
eat too many carbs.</s> <s>That's unfortunate. " ... "Are they trying to lose weight or are they just trying to Human: My friends are cool but they eat too many carbs.
be healthier?</s> " ... "<s> I'm not sure." ... ) >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors='pt')
>>> next_reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, >>> inputs = tokenizer([UTTERANCE], return_tensors="pt")
skip_special_tokens=True)[0]) >>> reply_ids = model.generate(**inputs)
>>> print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])
Bot: That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?
>>> REPLY = "I'm not sure"
>>> print("Human: ", REPLY)
Human: I'm not sure
>>> NEXT_UTTERANCE = (
... "My friends are cool but they eat too many carbs.</s> <s>That's unfortunate. "
... "Are they trying to lose weight or are they just trying to be healthier?</s> "
... "<s> I'm not sure."
... )
>>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt")
>>> next_reply_ids = model.generate(**inputs)
>>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])
Bot: That's too bad. Have you tried encouraging them to change their eating habits?
```
""" """
BLENDERBOT_INPUTS_DOCSTRING = r""" BLENDERBOT_INPUTS_DOCSTRING = r"""
...@@ -586,11 +603,10 @@ BLENDERBOT_INPUTS_DOCSTRING = r""" ...@@ -586,11 +603,10 @@ BLENDERBOT_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
...@@ -907,8 +923,8 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel): ...@@ -907,8 +923,8 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
...@@ -1130,13 +1146,13 @@ class BlenderbotModel(BlenderbotPreTrainedModel): ...@@ -1130,13 +1146,13 @@ class BlenderbotModel(BlenderbotPreTrainedModel):
>>> model = BlenderbotModel.from_pretrained("facebook/blenderbot-400M-distill") >>> model = BlenderbotModel.from_pretrained("facebook/blenderbot-400M-distill")
>>> tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill") >>> tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
>>> input_ids = tokenizer( >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 6, 1280]
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
...@@ -1389,7 +1405,7 @@ class BlenderbotDecoderWrapper(BlenderbotPreTrainedModel): ...@@ -1389,7 +1405,7 @@ class BlenderbotDecoderWrapper(BlenderbotPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot, facebook/bart-large->facebook/blenderbot-400M-distill # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Blenderbot, facebook/bart-base->facebook/blenderbot-400M-distill
class BlenderbotForCausalLM(BlenderbotPreTrainedModel): class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
...@@ -1520,6 +1536,9 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel): ...@@ -1520,6 +1536,9 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......
...@@ -504,20 +504,37 @@ BLENDERBOT_SMALL_START_DOCSTRING = r""" ...@@ -504,20 +504,37 @@ BLENDERBOT_SMALL_START_DOCSTRING = r"""
""" """
BLENDERBOT_SMALL_GENERATION_EXAMPLE = r""" BLENDERBOT_SMALL_GENERATION_EXAMPLE = r"""
Conversation example:: Conversation example:
>>> from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration >>> mname = ```python
'facebook/blenderbot_small-90M' >>> model = BlenderbotSmallForConditionalGeneration.from_pretrained(mname) >>> >>> from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration
tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname) >>> UTTERANCE = "My friends are cool but they eat
too many carbs." >>> print("Human: ", UTTERANCE) >>> inputs = tokenizer([UTTERANCE], return_tensors='pt') >>> >>> mname = "facebook/blenderbot_small-90M"
reply_ids = model.generate(**inputs) >>> print("Bot: ", tokenizer.batch_decode(reply_ids, >>> model = BlenderbotSmallForConditionalGeneration.from_pretrained(mname)
skip_special_tokens=True)[0]) what kind of carbs do they eat? i don't know much about carbs. >>> tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname)
>>> UTTERANCE = "My friends are cool but they eat too many carbs."
>>> REPLY = "I'm not sure" >>> print("Human: ", REPLY) >>> NEXT_UTTERANCE = ( ... "My friends are cool but they >>> print("Human: ", UTTERANCE)
eat too many carbs.</s> " ... "<s>what kind of carbs do they eat? i don't know much about carbs.</s> " ... Human: My friends are cool but they eat too many carbs.
"<s>I'm not sure." ... ) >>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors='pt') >>>
inputs.pop("token_type_ids") >>> next_reply_ids = model.generate(**inputs) >>> print("Bot: ", >>> inputs = tokenizer([UTTERANCE], return_tensors="pt")
tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0]) >>> reply_ids = model.generate(**inputs)
>>> print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])
Bot: what kind of carbs do they eat? i don't know much about carbs.
>>> REPLY = "I'm not sure"
>>> print("Human: ", REPLY)
Human: I'm not sure
>>> NEXT_UTTERANCE = (
... "My friends are cool but they eat too many carbs.</s> <s>what kind of carbs do they eat? "
... "i don't know much about carbs</s> "
... "<s> I'm not sure."
... )
>>> inputs = tokenizer([NEXT_UTTERANCE], return_tensors="pt")
>>> next_reply_ids = model.generate(**inputs)
>>> print("Bot: ", tokenizer.batch_decode(next_reply_ids, skip_special_tokens=True)[0])
Bot: they eat a lot of carbs. carbs are high in fat, protein, and carbohydrates.
```
""" """
BLENDERBOT_SMALL_INPUTS_DOCSTRING = r""" BLENDERBOT_SMALL_INPUTS_DOCSTRING = r"""
...@@ -584,11 +601,10 @@ BLENDERBOT_SMALL_INPUTS_DOCSTRING = r""" ...@@ -584,11 +601,10 @@ BLENDERBOT_SMALL_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
...@@ -902,8 +918,8 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel): ...@@ -902,8 +918,8 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
...@@ -1113,13 +1129,13 @@ class BlenderbotSmallModel(BlenderbotSmallPreTrainedModel): ...@@ -1113,13 +1129,13 @@ class BlenderbotSmallModel(BlenderbotSmallPreTrainedModel):
>>> model = BlenderbotSmallModel.from_pretrained("facebook/blenderbot_small-90M") >>> model = BlenderbotSmallModel.from_pretrained("facebook/blenderbot_small-90M")
>>> tokenizer = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot_small-90M") >>> tokenizer = BlenderbotSmallTokenizer.from_pretrained("facebook/blenderbot_small-90M")
>>> input_ids = tokenizer( >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
... "Studies have been shown that owning a dog is good for you", return_tensors="pt" >>> decoder_inputs = tokenizer("Studies show that", return_tensors="pt") # Batch size 1
>>> ).input_ids # Batch size 1 >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 3, 512]
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
...@@ -1360,7 +1376,7 @@ class BlenderbotSmallDecoderWrapper(BlenderbotSmallPreTrainedModel): ...@@ -1360,7 +1376,7 @@ class BlenderbotSmallDecoderWrapper(BlenderbotSmallPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall, facebook/bart-large->facebook/blenderbot_small-90M # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->BlenderbotSmall, facebook/bart-base->facebook/blenderbot_small-90M
class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel): class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
...@@ -1491,6 +1507,9 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel): ...@@ -1491,6 +1507,9 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......
...@@ -523,27 +523,28 @@ MARIAN_START_DOCSTRING = r""" ...@@ -523,27 +523,28 @@ MARIAN_START_DOCSTRING = r"""
""" """
MARIAN_GENERATION_EXAMPLE = r""" MARIAN_GENERATION_EXAMPLE = r"""
Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints. Pytorch version of marian-nmt's transformer.h (c++). Designed for the OPUS-NMT translation checkpoints. Available
Available models are listed [here](https://huggingface.co/models?search=Helsinki-NLP). models are listed [here](https://huggingface.co/models?search=Helsinki-NLP).
Examples: Examples:
```python ```python
>>> from transformers import MarianTokenizer, MarianMTModel >>> from transformers import MarianTokenizer, MarianMTModel
>>> from typing import List
>>> src = "fr" # source language
>>> src = "fr" # source language >>> trg = "en" # target language
>>> trg = "en" # target language
>>> sample_text = "où est l'arrêt de bus ?" >>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
>>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}" >>> model = MarianMTModel.from_pretrained(model_name)
>>> tokenizer = MarianTokenizer.from_pretrained(model_name)
>>> model = MarianMTModel.from_pretrained(model_name)
>>> tokenizer = MarianTokenizer.from_pretrained(model_name) >>> sample_text = "où est l'arrêt de bus ?"
>>> batch = tokenizer([sample_text], return_tensors="pt") >>> batch = tokenizer([sample_text], return_tensors="pt")
>>> gen = model.generate(**batch)
>>> tokenizer.batch_decode(gen, skip_special_tokens=True) >>> generated_ids = model.generate(**batch)
"Where is the bus stop ?" >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
``` "Where's the bus stop?"
```
""" """
MARIAN_INPUTS_DOCSTRING = r""" MARIAN_INPUTS_DOCSTRING = r"""
...@@ -927,7 +928,7 @@ class MarianDecoder(MarianPreTrainedModel): ...@@ -927,7 +928,7 @@ class MarianDecoder(MarianPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
This is useful if you want more control over how to convert `input_ids` indices into associated vectors This is useful if you want more control over how to convert `input_ids` indices into associated vectors
...@@ -1136,17 +1137,17 @@ class MarianModel(MarianPreTrainedModel): ...@@ -1136,17 +1137,17 @@ class MarianModel(MarianPreTrainedModel):
>>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de") >>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> model = MarianModel.from_pretrained("Helsinki-NLP/opus-mt-en-de") >>> model = MarianModel.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> input_ids = tokenizer( >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
... "Studies have been shown that owning a dog is good for you", return_tensors="pt" >>> decoder_inputs = tokenizer(
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer(
... "<pad> Studien haben gezeigt dass es hilfreich ist einen Hund zu besitzen", ... "<pad> Studien haben gezeigt dass es hilfreich ist einen Hund zu besitzen",
... return_tensors="pt", ... return_tensors="pt",
... add_special_tokens=False, ... add_special_tokens=False,
>>> ).input_ids # Batch size 1 ... )
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 26, 512]
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
...@@ -1400,7 +1401,7 @@ class MarianDecoderWrapper(MarianPreTrainedModel): ...@@ -1400,7 +1401,7 @@ class MarianDecoderWrapper(MarianPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian, facebook/bart-large->Helsinki-NLP/opus-mt-fr-en # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->Marian, facebook/bart-base->Helsinki-NLP/opus-mt-fr-en
class MarianForCausalLM(MarianPreTrainedModel): class MarianForCausalLM(MarianPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
...@@ -1529,6 +1530,9 @@ class MarianForCausalLM(MarianPreTrainedModel): ...@@ -1529,6 +1530,9 @@ class MarianForCausalLM(MarianPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......
...@@ -51,6 +51,16 @@ _CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25" ...@@ -51,6 +51,16 @@ _CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25"
_CONFIG_FOR_DOC = "MBartConfig" _CONFIG_FOR_DOC = "MBartConfig"
_TOKENIZER_FOR_DOC = "MBartTokenizer" _TOKENIZER_FOR_DOC = "MBartTokenizer"
# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 8, 1024]
# SequenceClassification docstring
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
# QuestionAsnwering docstring
_QA_EXPECTED_LOSS = 3.04
_QA_EXPECTED_OUTPUT_SHAPE = [1, 16]
MBART_PRETRAINED_MODEL_ARCHIVE_LIST = [ MBART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/mbart-large-cc25", "facebook/mbart-large-cc25",
...@@ -532,20 +542,21 @@ MBART_START_DOCSTRING = r""" ...@@ -532,20 +542,21 @@ MBART_START_DOCSTRING = r"""
""" """
MBART_GENERATION_EXAMPLE = r""" MBART_GENERATION_EXAMPLE = r"""
Summarization example: Translation example:
```python ```python
>>> from transformers import MBartTokenizer, MBartForConditionalGeneration >>> from transformers import MBartTokenizer, MBartForConditionalGeneration
>>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25") >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
>>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro")
>>> ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen." >>> example_english_phrase = "42 is the answer"
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt") >>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
>>> # Generate Summary >>> # Translate
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5) >>> generated_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
'42 este răspuns'
``` ```
Mask filling example: Mask filling example:
...@@ -567,6 +578,7 @@ MBART_GENERATION_EXAMPLE = r""" ...@@ -567,6 +578,7 @@ MBART_GENERATION_EXAMPLE = r"""
>>> values, predictions = probs.topk(5) >>> values, predictions = probs.topk(5)
>>> tokenizer.decode(predictions).split() >>> tokenizer.decode(predictions).split()
['nett', 'sehr', 'ganz', 'nicht', 'so']
``` ```
""" """
...@@ -639,11 +651,10 @@ MBART_INPUTS_DOCSTRING = r""" ...@@ -639,11 +651,10 @@ MBART_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
...@@ -966,8 +977,8 @@ class MBartDecoder(MBartPreTrainedModel): ...@@ -966,8 +977,8 @@ class MBartDecoder(MBartPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
...@@ -1153,6 +1164,7 @@ class MBartModel(MBartPreTrainedModel): ...@@ -1153,6 +1164,7 @@ class MBartModel(MBartPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqModelOutput, output_type=Seq2SeqModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_EXPECTED_OUTPUT_SHAPE,
) )
def forward( def forward(
self, self,
...@@ -1428,6 +1440,7 @@ class MBartForSequenceClassification(MBartPreTrainedModel): ...@@ -1428,6 +1440,7 @@ class MBartForSequenceClassification(MBartPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput, output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
) )
# Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
def forward( def forward(
...@@ -1553,6 +1566,8 @@ class MBartForQuestionAnswering(MBartPreTrainedModel): ...@@ -1553,6 +1566,8 @@ class MBartForQuestionAnswering(MBartPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqQuestionAnsweringModelOutput, output_type=Seq2SeqQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_loss=_QA_EXPECTED_LOSS,
expected_output=_QA_EXPECTED_OUTPUT_SHAPE,
) )
# Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward
def forward( def forward(
...@@ -1665,7 +1680,7 @@ class MBartDecoderWrapper(MBartPreTrainedModel): ...@@ -1665,7 +1680,7 @@ class MBartDecoderWrapper(MBartPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-large->facebook/mbart-large-cc25 # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-base->facebook/mbart-large-cc25
class MBartForCausalLM(MBartPreTrainedModel): class MBartForCausalLM(MBartPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
...@@ -1794,6 +1809,9 @@ class MBartForCausalLM(MBartPreTrainedModel): ...@@ -1794,6 +1809,9 @@ class MBartForCausalLM(MBartPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......
...@@ -529,7 +529,8 @@ PEGASUS_GENERATION_EXAMPLE = r""" ...@@ -529,7 +529,8 @@ PEGASUS_GENERATION_EXAMPLE = r"""
>>> # Generate Summary >>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"]) >>> summary_ids = model.generate(inputs["input_ids"])
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)) >>> tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"California's largest electricity provider has turned off power to hundreds of thousands of customers."
``` ```
""" """
...@@ -597,11 +598,10 @@ PEGASUS_INPUTS_DOCSTRING = r""" ...@@ -597,11 +598,10 @@ PEGASUS_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of shape
shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing `input_ids` you
you can choose to directly pass an embedded representation. This is useful if you want more control over can choose to directly pass an embedded representation. This is useful if you want more control over how to
how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
matrix.
decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*): decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
...@@ -977,8 +977,8 @@ class PegasusDecoder(PegasusPreTrainedModel): ...@@ -977,8 +977,8 @@ class PegasusDecoder(PegasusPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
...@@ -1211,13 +1211,13 @@ class PegasusModel(PegasusPreTrainedModel): ...@@ -1211,13 +1211,13 @@ class PegasusModel(PegasusPreTrainedModel):
>>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large") >>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large")
>>> model = PegasusModel.from_pretrained("google/pegasus-large") >>> model = PegasusModel.from_pretrained("google/pegasus-large")
>>> input_ids = tokenizer( >>> inputs = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt")
... "Studies have been shown that owning a dog is good for you", return_tensors="pt" >>> decoder_inputs = tokenizer("Studies show that", return_tensors="pt")
>>> ).input_ids # Batch size 1 >>> outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_inputs.input_ids)
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 4, 1024]
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
...@@ -1540,7 +1540,7 @@ class PegasusForCausalLM(PegasusPreTrainedModel): ...@@ -1540,7 +1540,7 @@ class PegasusForCausalLM(PegasusPreTrainedModel):
self.model.decoder.resize_position_embeddings(new_num_position_embeddings) self.model.decoder.resize_position_embeddings(new_num_position_embeddings)
@replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-large->google/pegasus-large # Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-base->google/pegasus-large
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
...@@ -1637,6 +1637,9 @@ class PegasusForCausalLM(PegasusPreTrainedModel): ...@@ -1637,6 +1637,9 @@ class PegasusForCausalLM(PegasusPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......
...@@ -50,6 +50,12 @@ _CHECKPOINT_FOR_DOC = "uclanlp/plbart-base" ...@@ -50,6 +50,12 @@ _CHECKPOINT_FOR_DOC = "uclanlp/plbart-base"
_CONFIG_FOR_DOC = "PLBartConfig" _CONFIG_FOR_DOC = "PLBartConfig"
_TOKENIZER_FOR_DOC = "PLBartTokenizer" _TOKENIZER_FOR_DOC = "PLBartTokenizer"
# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 8, 768]
# SequenceClassification docstring
_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE = [1, 2]
PLBART_PRETRAINED_MODEL_ARCHIVE_LIST = [ PLBART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"uclanlp/plbart-base", "uclanlp/plbart-base",
...@@ -526,27 +532,26 @@ PLBART_START_DOCSTRING = r""" ...@@ -526,27 +532,26 @@ PLBART_START_DOCSTRING = r"""
""" """
PLBART_GENERATION_EXAMPLE = r""" PLBART_GENERATION_EXAMPLE = r"""
Token in-filling example: Mask-filling example:
>>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration, PLBartConfig ```python
>>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration
>>> model = PLBartForConditionalGeneration.from_pretrained('uclanlp/plbart-base') >>> tokenizer = >>> model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-base")
PLBartTokenizer.from_pretrained('uclanlp/plbart-base', src_lang='java', tgt_lang='java') >>> METHOD_TO_FILL = >>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base")
"public static main (String args[0]) { data=Date(); System.out. String.format("Current Date : % tc", ));}" >>>
inputs = tokenizer([METHOD_TO_FILL], max_length=1024, return_tensors='pt') >>> # Generate Filled Code >>>
generated_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=5, early_stopping=True) >>>
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in
generated_ids])
Mask-filling example: >>> # en_XX is the language symbol id <LID> for English
>>> TXT = "<s> Is 0 the <mask> Fibonacci number ? </s> en_XX"
>>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt").input_ids
>>> logits = model(input_ids).logits
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
>>> probs = logits[0, masked_index].softmax(dim=0)
>>> values, predictions = probs.topk(5)
>>> from transformers import PLBartTokenizer, PLBartForConditionalGeneration >>> tokenizer = >>> tokenizer.decode(predictions).split()
PLBartTokenizer.from_pretrained('uclanlp/plbart-base') >>> # en_XX is the language symbol id <LID> for English ['same', 'first', 'highest', 'result', 'Fib']
>>> TXT = "</s> Is 0 the <mask> Fibonacci <mask> ? </s> en_XX" >>> model = ```
PLBartForConditionalGeneration.from_pretrained('uclanlp/plbart-base') >>> input_ids = tokenizer([TXT],
add_special_tokens=False, return_tensors='pt')['input_ids'] >>> logits = model(input_ids).logits >>>
masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() >>> probs = logits[0,
masked_index].softmax(dim=0) >>> values, predictions = probs.topk(5) >>> tokenizer.decode(predictions).split()
""" """
PLBART_INPUTS_DOCSTRING = r""" PLBART_INPUTS_DOCSTRING = r"""
...@@ -619,7 +624,7 @@ PLBART_INPUTS_DOCSTRING = r""" ...@@ -619,7 +624,7 @@ PLBART_INPUTS_DOCSTRING = r"""
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
``decoder_input_ids``` of shape `(batch_size, sequence_length)`. `decoder_input_ids` of shape `(batch_size, sequence_length)`.
inputs_embeds (: inputs_embeds (:
obj:*torch.FloatTensor* of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, obj:*torch.FloatTensor* of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally,
instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful instead of passing `input_ids` you can choose to directly pass an embedded representation. This is useful
...@@ -948,8 +953,8 @@ class PLBartDecoder(PLBartPreTrainedModel): ...@@ -948,8 +953,8 @@ class PLBartDecoder(PLBartPreTrainedModel):
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
all ``decoder_input_ids``` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` all `decoder_input_ids` of shape `(batch_size, sequence_length)`. inputs_embeds (`torch.FloatTensor` of
of shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing shape `(batch_size, sequence_length, hidden_size)`, *optional*): Optionally, instead of passing
`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more `input_ids` you can choose to directly pass an embedded representation. This is useful if you want more
control over how to convert `input_ids` indices into associated vectors than the model's internal control over how to convert `input_ids` indices into associated vectors than the model's internal
embedding lookup matrix. embedding lookup matrix.
...@@ -1406,6 +1411,7 @@ class PLBartForSequenceClassification(PLBartPreTrainedModel): ...@@ -1406,6 +1411,7 @@ class PLBartForSequenceClassification(PLBartPreTrainedModel):
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput, output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT_SHAPE,
) )
# Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
def forward( def forward(
...@@ -1521,7 +1527,7 @@ class PLBartDecoderWrapper(PLBartPreTrainedModel): ...@@ -1521,7 +1527,7 @@ class PLBartDecoderWrapper(PLBartPreTrainedModel):
return self.decoder(*args, **kwargs) return self.decoder(*args, **kwargs)
# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->PLBart # Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->PLBart, facebook/bart-base->uclanlp/plbart-base
class PLBartForCausalLM(PLBartPreTrainedModel): class PLBartForCausalLM(PLBartPreTrainedModel):
def __init__(self, config): def __init__(self, config):
config = copy.deepcopy(config) config = copy.deepcopy(config)
...@@ -1643,13 +1649,16 @@ class PLBartForCausalLM(PLBartPreTrainedModel): ...@@ -1643,13 +1649,16 @@ class PLBartForCausalLM(PLBartPreTrainedModel):
```python ```python
>>> from transformers import PLBartTokenizer, PLBartForCausalLM >>> from transformers import PLBartTokenizer, PLBartForCausalLM
>>> tokenizer = PLBartTokenizer.from_pretrained("facebook/bart-large") >>> tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base")
>>> model = PLBartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False) >>> model = PLBartForCausalLM.from_pretrained("uclanlp/plbart-base", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
>>> list(logits.shape) == expected_shape
True
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......
...@@ -20,5 +20,13 @@ src/transformers/models/poolformer/modeling_poolformer.py ...@@ -20,5 +20,13 @@ src/transformers/models/poolformer/modeling_poolformer.py
src/transformers/models/vit_mae/modeling_vit_mae.py src/transformers/models/vit_mae/modeling_vit_mae.py
src/transformers/models/segformer/modeling_segformer.py src/transformers/models/segformer/modeling_segformer.py
src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py
src/transformers/models/bart/modeling_bart.py
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
src/transformers/models/marian/modeling_marian.py
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/blenderbot/modeling_blenderbot.py
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
src/transformers/models/plbart/modeling_plbart.py
docs/source/quicktour.mdx docs/source/quicktour.mdx
docs/source/task_summary.mdx docs/source/task_summary.mdx
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment