Unverified Commit b5e2b183 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Doc styler examples (#14953)

* Fix bad examples

* Add black formatting to style_doc

* Use first nonempty line

* Put it at the right place

* Don't add spaces to empty lines

* Better templates

* Deal with triple quotes in docstrings

* Result of style_doc

* Enable mdx treatment and fix code examples in MDXs

* Result of doc styler on doc source files

* Last fixes

* Break copy from
parent e13f72fb
......@@ -96,10 +96,13 @@ class MBartTokenizerFast(XLMRobertaTokenizerFast):
```python
>>> from transformers import MBartTokenizerFast
>>> tokenizer = MBartTokenizerFast.from_pretrained('facebook/mbart-large-en-ro', src_lang="en_XX", tgt_lang="ro_RO")
>>> tokenizer = MBartTokenizerFast.from_pretrained(
... "facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO"
... )
>>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
>>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
>>> inputs = tokenizer(example_english_phrase, return_tensors="pt)
>>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
>>> with tokenizer.as_target_tokenizer():
... labels = tokenizer(expected_translation_romanian, return_tensors="pt")
>>> inputs["labels"] = labels["input_ids"]
......
......@@ -96,12 +96,13 @@ class MBart50Tokenizer(PreTrainedTokenizer):
```python
>>> from transformers import MBart50Tokenizer
>>> tokenizer = MBart50Tokenizer.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
>>> src_text = " UN Chief Says There Is No Military Solution in Syria"
>>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
>>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
>>> model_inputs = tokenizer(src_text, return_tensors="pt")
>>> with tokenizer.as_target_tokenizer():
... labels = tokenizer(tgt_text, return_tensors="pt").input_ids
... labels = tokenizer(tgt_text, return_tensors="pt").input_ids
>>> # model(**model_inputs, labels=labels) should work
```"""
......
......@@ -91,12 +91,13 @@ class MBart50TokenizerFast(PreTrainedTokenizerFast):
```python
>>> from transformers import MBart50TokenizerFast
>>> tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
>>> src_text = " UN Chief Says There Is No Military Solution in Syria"
>>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
>>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
>>> model_inputs = tokenizer(src_text, return_tensors="pt")
>>> with tokenizer.as_target_tokenizer():
... labels = tokenizer(tgt_text, return_tensors="pt").input_ids
... labels = tokenizer(tgt_text, return_tensors="pt").input_ids
>>> # model(**model_inputs, labels=labels) should work
```"""
......
......@@ -1063,8 +1063,8 @@ class MegatronBertForPreTraining(MegatronBertPreTrainedModel):
>>> from transformers import BertTokenizer, MegatronBertForPreTraining
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('nvidia/megatron-bert-cased-345m')
>>> model = MegatronBertForPreTraining.from_pretrained('nvidia/megatron-bert-cased-345m')
>>> tokenizer = BertTokenizer.from_pretrained("nvidia/megatron-bert-cased-345m")
>>> model = MegatronBertForPreTraining.from_pretrained("nvidia/megatron-bert-cased-345m")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1187,8 +1187,8 @@ class MegatronBertForCausalLM(MegatronBertPreTrainedModel):
>>> from transformers import BertTokenizer, MegatronBertForCausalLM, MegatronBertConfig
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('nvidia/megatron-bert-cased-345m')
>>> model = MegatronBertForCausalLM.from_pretrained('nvidia/megatron-bert-cased-345m', is_decoder=True)
>>> tokenizer = BertTokenizer.from_pretrained("nvidia/megatron-bert-cased-345m")
>>> model = MegatronBertForCausalLM.from_pretrained("nvidia/megatron-bert-cased-345m", is_decoder=True)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1413,16 +1413,16 @@ class MegatronBertForNextSentencePrediction(MegatronBertPreTrainedModel):
>>> from transformers import BertTokenizer, MegatronBertForNextSentencePrediction
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('nvidia/megatron-bert-cased-345m')
>>> model = MegatronBertForNextSentencePrediction.from_pretrained('nvidia/megatron-bert-cased-345m')
>>> tokenizer = BertTokenizer.from_pretrained("nvidia/megatron-bert-cased-345m")
>>> model = MegatronBertForNextSentencePrediction.from_pretrained("nvidia/megatron-bert-cased-345m")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
>>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> logits = outputs.logits
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
```"""
if "next_sentence_label" in kwargs:
......
......@@ -214,7 +214,7 @@ class MMBTModel(nn.Module, ModuleUtilsMixin):
```python
# For example purposes. Not runnable.
transformer = BertModel.from_pretrained('bert-base-uncased')
transformer = BertModel.from_pretrained("bert-base-uncased")
encoder = ImageEncoder(args)
mmbt = MMBTModel(config, transformer, encoder)
```"""
......@@ -334,7 +334,7 @@ class MMBTForClassification(nn.Module):
```python
# For example purposes. Not runnable.
transformer = BertModel.from_pretrained('bert-base-uncased')
transformer = BertModel.from_pretrained("bert-base-uncased")
encoder = ImageEncoder(args)
model = MMBTForClassification(config, transformer, encoder)
outputs = model(input_modal, input_ids, labels=labels)
......
......@@ -962,7 +962,9 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(
... 0
>>> ) # Batch size 1
>>> outputs = model(input_ids)
>>> prediction_logits = outputs.prediction_logits
......@@ -1147,12 +1149,12 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
>>> from transformers import MobileBertTokenizer, MobileBertForNextSentencePrediction
>>> import torch
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
>>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased')
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
>>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> loss = outputs.loss
......
......@@ -1038,8 +1038,8 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel):
>>> import tensorflow as tf
>>> from transformers import MobileBertTokenizer, TFMobileBertForPreTraining
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
>>> model = TFMobileBertForPreTraining.from_pretrained('google/mobilebert-uncased')
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = TFMobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
>>> outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2]
......@@ -1250,14 +1250,14 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel, TFNextS
>>> import tensorflow as tf
>>> from transformers import MobileBertTokenizer, TFMobileBertForNextSentencePrediction
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
>>> model = TFMobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased')
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = TFMobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='tf')
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="tf")
>>> logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]
>>> logits = model(encoding["input_ids"], token_type_ids=encoding["token_type_ids"])[0]
```"""
inputs = input_processing(
func=self.call,
......
......@@ -34,6 +34,7 @@ class MT5Model(T5Model):
```python
>>> from transformers import MT5Model, T5Tokenizer
>>> model = MT5Model.from_pretrained("google/mt5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
>>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
......@@ -67,6 +68,7 @@ class MT5ForConditionalGeneration(T5ForConditionalGeneration):
```python
>>> from transformers import MT5ForConditionalGeneration, T5Tokenizer
>>> model = MT5ForConditionalGeneration.from_pretrained("google/mt5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
>>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
......@@ -75,7 +77,7 @@ class MT5ForConditionalGeneration(T5ForConditionalGeneration):
>>> with tokenizer.as_target_tokenizer():
... labels = tokenizer(summary, return_tensors="pt")
>>> outputs = model(**inputs,labels=labels["input_ids"])
>>> outputs = model(**inputs, labels=labels["input_ids"])
>>> loss = outputs.loss
```"""
......@@ -98,6 +100,7 @@ class MT5EncoderModel(T5EncoderModel):
```python
>>> from transformers import MT5EncoderModel, T5Tokenizer
>>> model = MT5EncoderModel.from_pretrained("google/mt5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
>>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
......
......@@ -34,6 +34,7 @@ class TFMT5Model(TFT5Model):
```python
>>> from transformers import TFMT5Model, T5Tokenizer
>>> model = TFMT5Model.from_pretrained("google/mt5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
>>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
......@@ -58,6 +59,7 @@ class TFMT5ForConditionalGeneration(TFT5ForConditionalGeneration):
```python
>>> from transformers import TFMT5ForConditionalGeneration, T5Tokenizer
>>> model = TFMT5ForConditionalGeneration.from_pretrained("google/mt5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
>>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
......@@ -66,7 +68,7 @@ class TFMT5ForConditionalGeneration(TFT5ForConditionalGeneration):
>>> with tokenizer.as_target_tokenizer():
... labels = tokenizer(summary, return_tensors="tf")
>>> outputs = model(**inputs,labels=labels["input_ids"])
>>> outputs = model(**inputs, labels=labels["input_ids"])
>>> loss = outputs.loss
```"""
......@@ -83,6 +85,7 @@ class TFMT5EncoderModel(TFT5EncoderModel):
```python
>>> from transformers import TFMT5EncoderModel, T5Tokenizer
>>> model = TFMT5EncoderModel.from_pretrained("google/mt5-small")
>>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
>>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
......
......@@ -675,14 +675,16 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
>>> from transformers import OpenAIGPTTokenizer, OpenAIGPTDoubleHeadsModel
>>> import torch
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
>>> tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!)
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
>>> model = OpenAIGPTDoubleHeadsModel.from_pretrained("openai-gpt")
>>> tokenizer.add_special_tokens(
... {"cls_token": "[CLS]"}
>>> ) # Add a [CLS] to the vocabulary (we should train it also!)
>>> model.resize_token_embeddings(len(tokenizer))
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> mc_token_ids = torch.tensor([input_ids.size(-1)-1, input_ids.size(-1)-1]).unsqueeze(0) # Batch size 1
>>> mc_token_ids = torch.tensor([input_ids.size(-1) - 1, input_ids.size(-1) - 1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, mc_token_ids=mc_token_ids)
>>> lm_logits = outputs.lm_logits
......
......@@ -726,18 +726,22 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
>>> import tensorflow as tf
>>> from transformers import OpenAIGPTTokenizer, TFOpenAIGPTDoubleHeadsModel
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = TFOpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt")
>>> model = TFOpenAIGPTDoubleHeadsModel.from_pretrained("openai-gpt")
>>> # Add a [CLS] to the vocabulary (we should train it also!)
>>> tokenizer.add_special_tokens({'cls_token': '[CLS]'})
>>> tokenizer.add_special_tokens({"cls_token": "[CLS]"})
>>> model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
>>> print(tokenizer.cls_token_id, len(tokenizer)) # The newly token the last token of the vocabulary
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
>>> encoding = tokenizer(choices, return_tensors="tf")
>>> inputs = {k: tf.expand_dims(v, 0) for k, v in encoding.items()}
>>> inputs["mc_token_ids"]= tf.constant([inputs["input_ids"].shape[-1] - 1, inputs["input_ids"].shape[-1] - 1])[None, :] # Batch size 1
>>> inputs["mc_token_ids"] = tf.constant(
... [inputs["input_ids"].shape[-1] - 1, inputs["input_ids"].shape[-1] - 1]
>>> )[
... None, :
>>> ] # Batch size 1
>>> outputs = model(inputs)
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2]
```"""
......
......@@ -994,11 +994,11 @@ class FlaxPegasusPreTrainedModel(FlaxPreTrainedModel):
```python
>>> from transformers import PegasusTokenizer, FlaxPegasusForConditionalGeneration
>>> model = FlaxPegasusForConditionalGeneration.from_pretrained('google/pegasus-large')
>>> tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-large')
>>> model = FlaxPegasusForConditionalGeneration.from_pretrained("google/pegasus-large")
>>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large")
>>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=1024, return_tensors='np')
>>> inputs = tokenizer(text, max_length=1024, return_tensors="np")
>>> encoder_outputs = model.encode(**inputs)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......@@ -1060,11 +1060,11 @@ class FlaxPegasusPreTrainedModel(FlaxPreTrainedModel):
```python
>>> from transformers import PegasusTokenizer, FlaxPegasusForConditionalGeneration
>>> model = FlaxPegasusForConditionalGeneration.from_pretrained('google/pegasus-large')
>>> tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-large')
>>> model = FlaxPegasusForConditionalGeneration.from_pretrained("google/pegasus-large")
>>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large")
>>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=1024, return_tensors='np')
>>> inputs = tokenizer(text, max_length=1024, return_tensors="np")
>>> encoder_outputs = model.encode(**inputs)
>>> decoder_start_token_id = model.config.decoder_start_token_id
......@@ -1329,11 +1329,11 @@ class FlaxPegasusForConditionalGeneration(FlaxPegasusPreTrainedModel):
```python
>>> from transformers import PegasusTokenizer, FlaxPegasusForConditionalGeneration
>>> model = FlaxPegasusForConditionalGeneration.from_pretrained('google/pegasus-large')
>>> tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-large')
>>> model = FlaxPegasusForConditionalGeneration.from_pretrained("google/pegasus-large")
>>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large")
>>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=1024, return_tensors='np')
>>> inputs = tokenizer(text, max_length=1024, return_tensors="np")
>>> encoder_outputs = model.encode(**inputs)
>>> decoder_start_token_id = model.config.decoder_start_token_id
......
......@@ -1206,7 +1206,9 @@ class PegasusModel(PegasusPreTrainedModel):
>>> tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large")
>>> model = PegasusModel.from_pretrained("google/pegasus-large")
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
......@@ -1620,8 +1622,8 @@ class PegasusForCausalLM(PegasusPreTrainedModel):
```python
>>> from transformers import PegasusTokenizer, PegasusForCausalLM
>>> tokenizer = PegasusTokenizer.from_pretrained('facebook/bart-large')
>>> model = PegasusForCausalLM.from_pretrained('facebook/bart-large', add_cross_attention=False)
>>> tokenizer = PegasusTokenizer.from_pretrained("facebook/bart-large")
>>> model = PegasusForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......
......@@ -773,7 +773,11 @@ class PerceiverModel(PerceiverPreTrainedModel):
```python
>>> from transformers import PerceiverConfig, PerceiverTokenizer, PerceiverFeatureExtractor, PerceiverModel
>>> from transformers.models.perceiver.modeling_perceiver import PerceiverTextPreprocessor, PerceiverImagePreprocessor, PerceiverClassificationDecoder
>>> from transformers.models.perceiver.modeling_perceiver import (
... PerceiverTextPreprocessor,
... PerceiverImagePreprocessor,
... PerceiverClassificationDecoder,
... )
>>> import torch
>>> import requests
>>> from PIL import Image
......@@ -785,10 +789,12 @@ class PerceiverModel(PerceiverPreTrainedModel):
>>> # using trainable position embeddings
>>> config = PerceiverConfig()
>>> preprocessor = PerceiverTextPreprocessor(config)
>>> decoder = PerceiverClassificationDecoder(config,
... num_channels=config.d_latents,
... trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
... use_query_residual=True)
>>> decoder = PerceiverClassificationDecoder(
... config,
... num_channels=config.d_latents,
... trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
... use_query_residual=True,
... )
>>> model = PerceiverModel(config, input_preprocessor=preprocessor, decoder=decoder)
>>> # you can then do a forward pass as follows:
......@@ -797,7 +803,7 @@ class PerceiverModel(PerceiverPreTrainedModel):
>>> inputs = tokenizer(text, return_tensors="pt").input_ids
>>> with torch.no_grad():
>>> outputs = model(inputs=inputs)
... outputs = model(inputs=inputs)
>>> logits = outputs.logits
>>> # to train, one can train the model using standard cross-entropy:
......@@ -808,37 +814,39 @@ class PerceiverModel(PerceiverPreTrainedModel):
>>> # EXAMPLE 2: using the Perceiver to classify images
>>> # - we define an ImagePreprocessor, which can be used to embed images
>>> preprocessor=PerceiverImagePreprocessor(
... config,
... prep_type="conv1x1",
... spatial_downsample=1,
... out_channels=256,
... position_encoding_type="trainable",
... concat_or_add_pos="concat",
... project_pos_dim=256,
... trainable_position_encoding_kwargs=dict(num_channels=256, index_dims=config.image_size ** 2,
... ),
>>> preprocessor = PerceiverImagePreprocessor(
... config,
... prep_type="conv1x1",
... spatial_downsample=1,
... out_channels=256,
... position_encoding_type="trainable",
... concat_or_add_pos="concat",
... project_pos_dim=256,
... trainable_position_encoding_kwargs=dict(
... num_channels=256,
... index_dims=config.image_size ** 2,
... ),
... )
>>> model = PerceiverModel(
... config,
... input_preprocessor=preprocessor,
... decoder=PerceiverClassificationDecoder(
... config,
... input_preprocessor=preprocessor,
... decoder=PerceiverClassificationDecoder(
... config,
... num_channels=config.d_latents,
... trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
... use_query_residual=True,
... ),
... num_channels=config.d_latents,
... trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
... use_query_residual=True,
... ),
... )
>>> # you can then do a forward pass as follows:
>>> feature_extractor = PerceiverFeatureExtractor()
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(image, return_tensors="pt").pixel_values
>>> with torch.no_grad():
>>> outputs = model(inputs=inputs)
... outputs = model(inputs=inputs)
>>> logits = outputs.logits
>>> # to train, one can train the model using standard cross-entropy:
......@@ -1001,14 +1009,14 @@ class PerceiverForMaskedLM(PerceiverPreTrainedModel):
>>> from transformers import PerceiverTokenizer, PerceiverForMaskedLM
>>> import torch
>>> tokenizer = PerceiverTokenizer.from_pretrained('deepmind/language-perceiver')
>>> model = PerceiverForMaskedLM.from_pretrained('deepmind/language-perceiver')
>>> tokenizer = PerceiverTokenizer.from_pretrained("deepmind/language-perceiver")
>>> model = PerceiverForMaskedLM.from_pretrained("deepmind/language-perceiver")
>>> # training
>>> text = "This is an incomplete sentence where some words are missing."
>>> inputs = tokenizer(text, padding="max_length", return_tensors="pt")
>>> # mask " missing."
>>> inputs['input_ids'][0, 52:61] = tokenizer.mask_token_id
>>> inputs["input_ids"][0, 52:61] = tokenizer.mask_token_id
>>> labels = tokenizer(text, padding="max_length", return_tensors="pt").input_ids
>>> outputs = model(**inputs, labels=labels)
......@@ -1020,11 +1028,11 @@ class PerceiverForMaskedLM(PerceiverPreTrainedModel):
>>> encoding = tokenizer(text, padding="max_length", return_tensors="pt")
>>> # mask bytes corresponding to " missing.". Note that the model performs much better if the masked span starts with a space.
>>> encoding['input_ids'][0, 52:61] = tokenizer.mask_token_id
>>> encoding["input_ids"][0, 52:61] = tokenizer.mask_token_id
>>> # forward pass
>>> with torch.no_grad():
>>> outputs = model(**encoding)
... outputs = model(**encoding)
>>> logits = outputs.logits
>>> masked_tokens_predictions = logits[0, 52:61].argmax(dim=-1).tolist()
......@@ -1117,8 +1125,8 @@ class PerceiverForSequenceClassification(PerceiverPreTrainedModel):
```python
>>> from transformers import PerceiverTokenizer, PerceiverForSequenceClassification
>>> tokenizer = PerceiverTokenizer.from_pretrained('deepmind/language-perceiver')
>>> model = PerceiverForSequenceClassification.from_pretrained('deepmind/language-perceiver')
>>> tokenizer = PerceiverTokenizer.from_pretrained("deepmind/language-perceiver")
>>> model = PerceiverForSequenceClassification.from_pretrained("deepmind/language-perceiver")
>>> text = "hello world"
>>> inputs = tokenizer(text, return_tensors="pt").input_ids
......@@ -1252,11 +1260,11 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel):
>>> from PIL import Image
>>> import requests
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained('deepmind/vision-perceiver-learned')
>>> model = PerceiverForImageClassificationLearned.from_pretrained('deepmind/vision-perceiver-learned')
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-learned")
>>> model = PerceiverForImageClassificationLearned.from_pretrained("deepmind/vision-perceiver-learned")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs)
......@@ -1389,11 +1397,11 @@ class PerceiverForImageClassificationFourier(PerceiverPreTrainedModel):
>>> from PIL import Image
>>> import requests
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained('deepmind/vision-perceiver-fourier')
>>> model = PerceiverForImageClassificationFourier.from_pretrained('deepmind/vision-perceiver-fourier')
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-fourier")
>>> model = PerceiverForImageClassificationFourier.from_pretrained("deepmind/vision-perceiver-fourier")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs)
......@@ -1526,11 +1534,11 @@ class PerceiverForImageClassificationConvProcessing(PerceiverPreTrainedModel):
>>> from PIL import Image
>>> import requests
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained('deepmind/vision-perceiver-conv')
>>> model = PerceiverForImageClassificationConvProcessing.from_pretrained('deepmind/vision-perceiver-conv')
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-conv")
>>> model = PerceiverForImageClassificationConvProcessing.from_pretrained("deepmind/vision-perceiver-conv")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs)
......@@ -1676,7 +1684,7 @@ class PerceiverForOpticalFlow(PerceiverPreTrainedModel):
>>> from transformers import PerceiverForOpticalFlow
>>> import torch
>>> model = PerceiverForOpticalFlow.from_pretrained('deepmind/optical-flow-perceiver')
>>> model = PerceiverForOpticalFlow.from_pretrained("deepmind/optical-flow-perceiver")
>>> # in the Perceiver IO paper, the authors extract a 3 x 3 patch around each pixel,
>>> # leading to 3 x 3 x 3 = 27 values for each pixel (as each pixel also has 3 color channels)
......@@ -1894,7 +1902,7 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
>>> audio = torch.randn((1, 30720, 1))
>>> inputs = dict(image=images, audio=audio, label=torch.zeros((images.shape[0], 700)))
>>> model = PerceiverForMultimodalAutoencoding.from_pretrained('deepmind/multimodal-perceiver')
>>> model = PerceiverForMultimodalAutoencoding.from_pretrained("deepmind/multimodal-perceiver")
>>> # in the Perceiver IO paper, videos are auto-encoded in chunks
>>> # each chunk subsamples different index dimensions of the image and audio modality decoder queries
......@@ -1904,9 +1912,9 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
>>> # process the first chunk
>>> chunk_idx = 0
>>> subsampling = {
... "image": torch.arange(image_chunk_size * chunk_idx, image_chunk_size * (chunk_idx + 1)),
... "audio": torch.arange(audio_chunk_size * chunk_idx, audio_chunk_size * (chunk_idx + 1)),
... "label": None,
... "image": torch.arange(image_chunk_size * chunk_idx, image_chunk_size * (chunk_idx + 1)),
... "audio": torch.arange(audio_chunk_size * chunk_idx, audio_chunk_size * (chunk_idx + 1)),
... "label": None,
... }
>>> outputs = model(inputs=inputs, subsampled_output_points=subsampling)
......
......@@ -1292,8 +1292,8 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel):
>>> from transformers import ProphetNetTokenizer, ProphetNetEncoder
>>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetEncoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone')
>>> tokenizer = ProphetNetTokenizer.from_pretrained("microsoft/prophetnet-large-uncased")
>>> model = ProphetNetEncoder.from_pretrained("patrickvonplaten/prophetnet-large-uncased-standalone")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1469,8 +1469,8 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel):
>>> from transformers import ProphetNetTokenizer, ProphetNetDecoder
>>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetDecoder.from_pretrained('microsoft/prophetnet-large-uncased', add_cross_attention=False)
>>> tokenizer = ProphetNetTokenizer.from_pretrained("microsoft/prophetnet-large-uncased")
>>> model = ProphetNetDecoder.from_pretrained("microsoft/prophetnet-large-uncased", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1809,10 +1809,12 @@ class ProphetNetModel(ProphetNetPreTrainedModel):
```python
>>> from transformers import ProphetNetTokenizer, ProphetNetModel
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetModel.from_pretrained('microsoft/prophetnet-large-uncased')
>>> tokenizer = ProphetNetTokenizer.from_pretrained("microsoft/prophetnet-large-uncased")
>>> model = ProphetNetModel.from_pretrained("microsoft/prophetnet-large-uncased")
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
......@@ -1929,10 +1931,12 @@ class ProphetNetForConditionalGeneration(ProphetNetPreTrainedModel):
```python
>>> from transformers import ProphetNetTokenizer, ProphetNetForConditionalGeneration
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetForConditionalGeneration.from_pretrained('microsoft/prophetnet-large-uncased')
>>> tokenizer = ProphetNetTokenizer.from_pretrained("microsoft/prophetnet-large-uncased")
>>> model = ProphetNetForConditionalGeneration.from_pretrained("microsoft/prophetnet-large-uncased")
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
......@@ -2173,8 +2177,8 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
>>> from transformers import ProphetNetTokenizer, ProphetNetForCausalLM
>>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetForCausalLM.from_pretrained('microsoft/prophetnet-large-uncased')
>>> tokenizer = ProphetNetTokenizer.from_pretrained("microsoft/prophetnet-large-uncased")
>>> model = ProphetNetForCausalLM.from_pretrained("microsoft/prophetnet-large-uncased")
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -2185,17 +2189,21 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
>>> from transformers import BertTokenizer, EncoderDecoderModel, ProphetNetTokenizer
>>> import torch
>>> tokenizer_enc = BertTokenizer.from_pretrained('bert-large-uncased')
>>> tokenizer_dec = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained("bert-large-uncased", "microsoft/prophetnet-large-uncased")
>>> tokenizer_enc = BertTokenizer.from_pretrained("bert-large-uncased")
>>> tokenizer_dec = ProphetNetTokenizer.from_pretrained("microsoft/prophetnet-large-uncased")
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained(
... "bert-large-uncased", "microsoft/prophetnet-large-uncased"
... )
>>> ARTICLE = (
... "the us state department said wednesday it had received no "
... "formal word from bolivia that it was expelling the us ambassador there "
... "but said the charges made against him are `` baseless ."
... "the us state department said wednesday it had received no "
... "formal word from bolivia that it was expelling the us ambassador there "
... "but said the charges made against him are `` baseless ."
... )
>>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids
>>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids
>>> labels = tokenizer_dec(
... "us rejects charges against its ambassador in bolivia", return_tensors="pt"
>>> ).input_ids
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:])
>>> loss = outputs.loss
......
......@@ -1083,10 +1083,10 @@ class QDQBertLMHeadModel(QDQBertPreTrainedModel):
>>> from transformers import BertTokenizer, QDQBertLMHeadModel, QDQBertConfig
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> config = QDQBertConfig.from_pretrained("bert-base-cased")
>>> config.is_decoder = True
>>> model = QDQBertLMHeadModel.from_pretrained('bert-base-cased', config=config)
>>> model = QDQBertLMHeadModel.from_pretrained("bert-base-cased", config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1310,16 +1310,16 @@ class QDQBertForNextSentencePrediction(QDQBertPreTrainedModel):
>>> from transformers import BertTokenizer, QDQBertForNextSentencePrediction
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = QDQBertForNextSentencePrediction.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
>>> model = QDQBertForNextSentencePrediction.from_pretrained("bert-base-uncased")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
>>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> logits = outputs.logits
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
```"""
if "next_sentence_label" in kwargs:
......
......@@ -303,8 +303,11 @@ class RagPreTrainedModel(PreTrainedModel):
```python
>>> from transformers import RagModel
>>> # initialize a RAG from two pretrained models.
>>> model = RagModel.from_question_encoder_generator_pretrained('facebook/dpr-question_encoder-single-nq-base', 't5-small')
>>> model = RagModel.from_question_encoder_generator_pretrained(
... "facebook/dpr-question_encoder-single-nq-base", "t5-small"
... )
>>> # saving model after fine-tuning
>>> model.save_pretrained("./rag")
>>> # load fine-tuned model
......@@ -560,7 +563,9 @@ class RagModel(RagPreTrainedModel):
>>> import torch
>>> tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base")
>>> retriever = RagRetriever.from_pretrained("facebook/rag-token-base", index_name="exact", use_dummy_dataset=True)
>>> retriever = RagRetriever.from_pretrained(
... "facebook/rag-token-base", index_name="exact", use_dummy_dataset=True
... )
>>> # initialize with RagRetriever to do everything in one forward call
>>> model = RagModel.from_pretrained("facebook/rag-token-base", retriever=retriever)
......@@ -801,13 +806,15 @@ class RagSequenceForGeneration(RagPreTrainedModel):
>>> import torch
>>> tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
>>> retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True)
>>> retriever = RagRetriever.from_pretrained(
... "facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True
... )
>>> # initialize with RagRetriever to do everything in one forward call
>>> model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever)
>>> inputs = tokenizer("How many people live in Paris?", return_tensors="pt")
>>> with tokenizer.as_target_tokenizer():
... targets = tokenizer("In Paris, there are 10 million people.", return_tensors="pt")
... targets = tokenizer("In Paris, there are 10 million people.", return_tensors="pt")
>>> input_ids = inputs["input_ids"]
>>> labels = targets["input_ids"]
>>> outputs = model(input_ids=input_ids, labels=labels)
......@@ -818,9 +825,16 @@ class RagSequenceForGeneration(RagPreTrainedModel):
>>> question_hidden_states = model.question_encoder(input_ids)[0]
>>> # 2. Retrieve
>>> docs_dict = retriever(input_ids.numpy(), question_hidden_states.detach().numpy(), return_tensors="pt")
>>> doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), docs_dict["retrieved_doc_embeds"].float().transpose(1, 2)).squeeze(1)
>>> doc_scores = torch.bmm(
... question_hidden_states.unsqueeze(1), docs_dict["retrieved_doc_embeds"].float().transpose(1, 2)
>>> ).squeeze(1)
>>> # 3. Forward to generator
>>> outputs = model(context_input_ids=docs_dict["context_input_ids"], context_attention_mask=docs_dict["context_attention_mask"], doc_scores=doc_scores, decoder_input_ids=labels)
>>> outputs = model(
... context_input_ids=docs_dict["context_input_ids"],
... context_attention_mask=docs_dict["context_attention_mask"],
... doc_scores=doc_scores,
... decoder_input_ids=labels,
... )
```"""
n_docs = n_docs if n_docs is not None else self.config.n_docs
exclude_bos_score = exclude_bos_score if exclude_bos_score is not None else self.config.exclude_bos_score
......@@ -1259,13 +1273,15 @@ class RagTokenForGeneration(RagPreTrainedModel):
>>> import torch
>>> tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
>>> retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
>>> retriever = RagRetriever.from_pretrained(
... "facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True
... )
>>> # initialize with RagRetriever to do everything in one forward call
>>> model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever)
>>> inputs = tokenizer("How many people live in Paris?", return_tensors="pt")
>>> with tokenizer.as_target_tokenizer():
... targets = tokenizer("In Paris, there are 10 million people.", return_tensors="pt")
... targets = tokenizer("In Paris, there are 10 million people.", return_tensors="pt")
>>> input_ids = inputs["input_ids"]
>>> labels = targets["input_ids"]
>>> outputs = model(input_ids=input_ids, labels=labels)
......@@ -1276,12 +1292,23 @@ class RagTokenForGeneration(RagPreTrainedModel):
>>> question_hidden_states = model.question_encoder(input_ids)[0]
>>> # 2. Retrieve
>>> docs_dict = retriever(input_ids.numpy(), question_hidden_states.detach().numpy(), return_tensors="pt")
>>> doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), docs_dict["retrieved_doc_embeds"].float().transpose(1, 2)).squeeze(1)
>>> doc_scores = torch.bmm(
... question_hidden_states.unsqueeze(1), docs_dict["retrieved_doc_embeds"].float().transpose(1, 2)
>>> ).squeeze(1)
>>> # 3. Forward to generator
>>> outputs = model(context_input_ids=docs_dict["context_input_ids"], context_attention_mask=docs_dict["context_attention_mask"], doc_scores=doc_scores, decoder_input_ids=labels)
>>> outputs = model(
... context_input_ids=docs_dict["context_input_ids"],
... context_attention_mask=docs_dict["context_attention_mask"],
... doc_scores=doc_scores,
... decoder_input_ids=labels,
... )
>>> # or directly generate
>>> generated = model.generate(context_input_ids=docs_dict["context_input_ids"], context_attention_mask=docs_dict["context_attention_mask"], doc_scores=doc_scores)
>>> generated = model.generate(
... context_input_ids=docs_dict["context_input_ids"],
... context_attention_mask=docs_dict["context_attention_mask"],
... doc_scores=doc_scores,
... )
>>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True)
```"""
n_docs = n_docs if n_docs is not None else self.config.n_docs
......
......@@ -276,10 +276,18 @@ class TFRagPreTrainedModel(TFPreTrainedModel):
```python
>>> from transformers import RagRetriever, TFRagModel
>>> # initialize a RAG from two pretrained models.
>>> model = TFRagModel.from_pretrained_question_encoder_generator('facebook/dpr-question_encoder-single-nq-base', 't5-small')
>>> model = TFRagModel.from_pretrained_question_encoder_generator(
... "facebook/dpr-question_encoder-single-nq-base", "t5-small"
... )
>>> # alternatively, initialize from pytorch pretrained models can also be done
>>> model = TFRagModel.from_pretrained_question_encoder_generator('facebook/dpr-question_encoder-single-nq-base', "facebook/bart-base", generator_from_pt=True, question_encoder_from_pt=True)
>>> model = TFRagModel.from_pretrained_question_encoder_generator(
... "facebook/dpr-question_encoder-single-nq-base",
... "facebook/bart-base",
... generator_from_pt=True,
... question_encoder_from_pt=True,
... )
>>> # saving model after fine-tuning
>>> model.save_pretrained("./rag")
......@@ -555,11 +563,15 @@ class TFRagModel(TFRagPreTrainedModel):
>>> import torch
>>> tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base")
>>> retriever = RagRetriever.from_pretrained("facebook/rag-token-base", index_name="exact", use_dummy_dataset=True)
>>> retriever = RagRetriever.from_pretrained(
... "facebook/rag-token-base", index_name="exact", use_dummy_dataset=True
... )
>>> # initialize with RagRetriever to do everything in one forward call
>>> model = TFRagModel.from_pretrained("facebook/rag-token-base", retriever=retriever, from_pt=True)
>>> input_dict = tokenizer.prepare_seq2seq_batch("How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf")
>>> input_dict = tokenizer.prepare_seq2seq_batch(
... "How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf"
... )
>>> input_ids = input_dict["input_ids"]
>>> outputs = model(input_ids)
```"""
......@@ -930,11 +942,15 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
>>> from transformers import RagTokenizer, RagRetriever, TFRagTokenForGeneration
>>> tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
>>> retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
>>> retriever = RagRetriever.from_pretrained(
... "facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True
... )
>>> # initialize with RagRetriever to do everything in one forward call
>>> model = TFRagTokenForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever, from_pt=True)
>>> input_dict = tokenizer.prepare_seq2seq_batch("How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf")
>>> input_dict = tokenizer.prepare_seq2seq_batch(
... "How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf"
... )
>>> outputs = model(input_dict, output_retrieved=True)
>>> # or use retriever separately
......@@ -943,12 +959,27 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
>>> question_hidden_states = model.question_encoder(input_ids)[0]
>>> # 2. Retrieve
>>> docs_dict = retriever(input_ids.numpy(), question_hidden_states.numpy(), return_tensors="tf")
>>> doc_scores = tf.squeeze(tf.matmul(tf.expand_dims(question_hidden_states, axis=1), docs_dict["retrieved_doc_embeds"], transpose_b=True), axis=1)
>>> doc_scores = tf.squeeze(
... tf.matmul(
... tf.expand_dims(question_hidden_states, axis=1), docs_dict["retrieved_doc_embeds"], transpose_b=True
... ),
... axis=1,
... )
>>> # 3. Forward to generator
>>> outputs = model(inputs=None, context_input_ids=docs_dict["context_input_ids"], context_attention_mask=docs_dict["context_attention_mask"], doc_scores=doc_scores, decoder_input_ids=input_dict["labels"])
>>> outputs = model(
... inputs=None,
... context_input_ids=docs_dict["context_input_ids"],
... context_attention_mask=docs_dict["context_attention_mask"],
... doc_scores=doc_scores,
... decoder_input_ids=input_dict["labels"],
... )
>>> # or directly generate
>>> generated = model.generate(context_input_ids=docs_dict["context_input_ids"], context_attention_mask=docs_dict["context_attention_mask"], doc_scores=doc_scores)
>>> generated = model.generate(
... context_input_ids=docs_dict["context_input_ids"],
... context_attention_mask=docs_dict["context_attention_mask"],
... doc_scores=doc_scores,
... )
>>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True)
```"""
......@@ -1519,11 +1550,17 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
>>> from transformers import RagTokenizer, RagRetriever, TFRagSequenceForGeneration
>>> tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
>>> retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True)
>>> retriever = RagRetriever.from_pretrained(
... "facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True
... )
>>> # initialize with RagRetriever to do everything in one forward call
>>> model = TFRagRagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever, from_pt=True)
>>> model = TFRagRagSequenceForGeneration.from_pretrained(
... "facebook/rag-sequence-nq", retriever=retriever, from_pt=True
... )
>>> input_dict = tokenizer.prepare_seq2seq_batch("How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf")
>>> input_dict = tokenizer.prepare_seq2seq_batch(
... "How many people live in Paris?", "In Paris, there are 10 million people.", return_tensors="tf"
... )
>>> outputs = model(input_dict, output_retrieved=True)
>>> # or use retriever separately
......@@ -1532,12 +1569,27 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
>>> question_hidden_states = model.question_encoder(input_ids)[0]
>>> # 2. Retrieve
>>> docs_dict = retriever(input_ids.numpy(), question_hidden_states.numpy(), return_tensors="tf")
>>> doc_scores = tf.squeeze(tf.matmul(tf.expand_dims(question_hidden_states, axis=1), docs_dict["retrieved_doc_embeds"], transpose_b=True), axis=1)
>>> doc_scores = tf.squeeze(
... tf.matmul(
... tf.expand_dims(question_hidden_states, axis=1), docs_dict["retrieved_doc_embeds"], transpose_b=True
... ),
... axis=1,
... )
>>> # 3. Forward to generator
>>> outputs = model(inputs=None, context_input_ids=docs_dict["context_input_ids"], context_attention_mask=docs_dict["context_attention_mask"], doc_scores=doc_scores, decoder_input_ids=input_dict["labels"])
>>> outputs = model(
... inputs=None,
... context_input_ids=docs_dict["context_input_ids"],
... context_attention_mask=docs_dict["context_attention_mask"],
... doc_scores=doc_scores,
... decoder_input_ids=input_dict["labels"],
... )
>>> # or directly generate
>>> generated = model.generate(context_input_ids=docs_dict["context_input_ids"], context_attention_mask=docs_dict["context_attention_mask"], doc_scores=doc_scores)
>>> generated = model.generate(
... context_input_ids=docs_dict["context_input_ids"],
... context_attention_mask=docs_dict["context_attention_mask"],
... doc_scores=doc_scores,
... )
>>> generated_string = tokenizer.batch_decode(generated, skip_special_tokens=True)
```"""
......
......@@ -345,22 +345,35 @@ class RagRetriever:
```python
>>> # To load the default "wiki_dpr" dataset with 21M passages from wikipedia (index name is 'compressed' or 'exact')
>>> from transformers import RagRetriever
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', dataset="wiki_dpr", index_name='compressed')
>>> retriever = RagRetriever.from_pretrained(
... "facebook/dpr-ctx_encoder-single-nq-base", dataset="wiki_dpr", index_name="compressed"
... )
>>> # To load your own indexed dataset built with the datasets library. More info on how to build the indexed dataset in examples/rag/use_own_knowledge_dataset.py
>>> from transformers import RagRetriever
>>> dataset = ... # dataset must be a datasets.Datasets object with columns "title", "text" and "embeddings", and it must have a faiss index
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', indexed_dataset=dataset)
>>> dataset = (
... ...
>>> ) # dataset must be a datasets.Datasets object with columns "title", "text" and "embeddings", and it must have a faiss index
>>> retriever = RagRetriever.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base", indexed_dataset=dataset)
>>> # To load your own indexed dataset built with the datasets library that was saved on disk. More info in examples/rag/use_own_knowledge_dataset.py
>>> from transformers import RagRetriever
>>> dataset_path = "path/to/my/dataset" # dataset saved via *dataset.save_to_disk(...)*
>>> index_path = "path/to/my/index.faiss" # faiss index saved via *dataset.get_index("embeddings").save(...)*
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', index_name='custom', passages_path=dataset_path, index_path=index_path)
>>> retriever = RagRetriever.from_pretrained(
... "facebook/dpr-ctx_encoder-single-nq-base",
... index_name="custom",
... passages_path=dataset_path,
... index_path=index_path,
... )
>>> # To load the legacy index built originally for Rag's paper
>>> from transformers import RagRetriever
>>> retriever = RagRetriever.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', index_name='legacy')
>>> retriever = RagRetriever.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base", index_name="legacy")
```"""
def __init__(self, config, question_encoder_tokenizer, generator_tokenizer, index=None, init_retrieval=True):
......
......@@ -1085,10 +1085,10 @@ class RemBertForCausalLM(RemBertPreTrainedModel):
>>> from transformers import RemBertTokenizer, RemBertForCausalLM, RemBertConfig
>>> import torch
>>> tokenizer = RemBertTokenizer.from_pretrained('google/rembert')
>>> tokenizer = RemBertTokenizer.from_pretrained("google/rembert")
>>> config = RemBertConfig.from_pretrained("google/rembert")
>>> config.is_decoder = True
>>> model = RemBertForCausalLM.from_pretrained('google/rembert', config=config)
>>> model = RemBertForCausalLM.from_pretrained("google/rembert", config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment