Unverified Commit b5e2b183 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Doc styler examples (#14953)

* Fix bad examples

* Add black formatting to style_doc

* Use first nonempty line

* Put it at the right place

* Don't add spaces to empty lines

* Better templates

* Deal with triple quotes in docstrings

* Result of style_doc

* Enable mdx treatment and fix code examples in MDXs

* Result of doc styler on doc source files

* Last fixes

* Break copy from
parent e13f72fb
......@@ -956,10 +956,10 @@ class RobertaForCausalLM(RobertaPreTrainedModel):
>>> from transformers import RobertaTokenizer, RobertaForCausalLM, RobertaConfig
>>> import torch
>>> tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
>>> tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
>>> config = RobertaConfig.from_pretrained("roberta-base")
>>> config.is_decoder = True
>>> model = RobertaForCausalLM.from_pretrained('roberta-base', config=config)
>>> model = RobertaForCausalLM.from_pretrained("roberta-base", config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......
......@@ -1131,10 +1131,10 @@ class RoFormerForCausalLM(RoFormerPreTrainedModel):
>>> from transformers import RoFormerTokenizer, RoFormerForCausalLM, RoFormerConfig
>>> import torch
>>> tokenizer = RoFormerTokenizer.from_pretrained('junnyu/roformer_chinese_base')
>>> tokenizer = RoFormerTokenizer.from_pretrained("junnyu/roformer_chinese_base")
>>> config = RoFormerConfig.from_pretrained("junnyu/roformer_chinese_base")
>>> config.is_decoder = True
>>> model = RoFormerForCausalLM.from_pretrained('junnyu/roformer_chinese_base', config=config)
>>> model = RoFormerForCausalLM.from_pretrained("junnyu/roformer_chinese_base", config=config)
>>> inputs = tokenizer("今天天气非常好。", return_tensors="pt")
>>> outputs = model(**inputs)
......
......@@ -103,7 +103,8 @@ class RoFormerTokenizer(PreTrainedTokenizer):
```python
>>> from transformers import RoFormerTokenizer
>>> tokenizer = RoFormerTokenizer.from_pretrained('junnyu/roformer_chinese_base')
>>> tokenizer = RoFormerTokenizer.from_pretrained("junnyu/roformer_chinese_base")
>>> tokenizer.tokenize("今天天气非常好。")
# ['今', '天', '天', '气', '非常', '好', '。']
```"""
......
......@@ -74,7 +74,8 @@ class RoFormerTokenizerFast(PreTrainedTokenizerFast):
```python
>>> from transformers import RoFormerTokenizerFast
>>> tokenizer = RoFormerTokenizerFast.from_pretrained('junnyu/roformer_chinese_base')
>>> tokenizer = RoFormerTokenizerFast.from_pretrained("junnyu/roformer_chinese_base")
>>> tokenizer.tokenize("今天天气非常好。")
# ['今', '天', '天', '气', '非常', '好', '。']
```"""
......
......@@ -493,7 +493,7 @@ class SegformerModel(SegformerPreTrainedModel):
>>> feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
>>> model = SegformerModel("nvidia/segformer-b0-finetuned-ade-512-512")
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(images=image, return_tensors="pt")
......@@ -570,11 +570,11 @@ class SegformerForImageClassification(SegformerPreTrainedModel):
>>> from PIL import Image
>>> import requests
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = SegformerFeatureExtractor.from_pretrained('nvidia/mit-b0')
>>> model = SegformerForImageClassification.from_pretrained('nvidia/mit-b0')
>>> feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/mit-b0")
>>> model = SegformerForImageClassification.from_pretrained("nvidia/mit-b0")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -729,7 +729,7 @@ class SegformerForSemanticSegmentation(SegformerPreTrainedModel):
>>> feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
>>> model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(images=image, return_tensors="pt")
......
......@@ -480,10 +480,10 @@ class XSoftmax(torch.autograd.Function):
>>> from transformers.models.deberta_v2.modeling_deberta_v2 import XSoftmax
>>> # Make a tensor
>>> x = torch.randn([4,20,100])
>>> x = torch.randn([4, 20, 100])
>>> # Create a mask
>>> mask = (x>0).int()
>>> mask = (x > 0).int()
>>> # Specify the dimension to apply softmax
>>> dim = -1
......
......@@ -64,11 +64,11 @@ class SpeechEncoderDecoderConfig(PretrainedConfig):
>>> config_decoder.add_cross_attention = True
>>> # Saving the model, including its configuration
>>> model.save_pretrained('my-model')
>>> model.save_pretrained("my-model")
>>> # loading model and config from pretrained folder
>>> encoder_decoder_config = SpeechEncoderDecoderConfig.from_pretrained('my-model')
>>> model = SpeechEncoderDecoderModel.from_pretrained('my-model', config=encoder_decoder_config)
>>> encoder_decoder_config = SpeechEncoderDecoderConfig.from_pretrained("my-model")
>>> model = SpeechEncoderDecoderModel.from_pretrained("my-model", config=encoder_decoder_config)
```"""
model_type = "speech-encoder-decoder"
is_composition = True
......
......@@ -330,8 +330,11 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
```python
>>> from transformers import SpeechEncoderDecoderModel
>>> # initialize a wav2vec2bert from a pretrained Wav2Vec2 and a pretrained BERT model. Note that the cross-attention layers will be randomly initialized
>>> model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained('facebook/wav2vec2-base-960h', 'bert-base-uncased')
>>> model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
... "facebook/wav2vec2-base-960h", "bert-base-uncased"
... )
>>> # saving model after fine-tuning
>>> model.save_pretrained("./wav2vec2bert")
>>> # load fine-tuned model
......@@ -447,8 +450,8 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
>>> from datasets import load_dataset
>>> import torch
>>> processor = Speech2Text2Processor.from_pretrained('facebook/s2t-wav2vec2-large-en-de')
>>> model = SpeechEncoderDecoderModel.from_pretrained('facebook/s2t-wav2vec2-large-en-de')
>>> processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
>>> model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
......
......@@ -1310,15 +1310,19 @@ class Speech2TextForConditionalGeneration(Speech2TextPreTrainedModel):
>>> model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
>>> processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
>>> def map_to_array(batch):
>>> speech, _ = sf.read(batch["file"])
>>> batch["speech"] = speech
>>> return batch
... speech, _ = sf.read(batch["file"])
... batch["speech"] = speech
... return batch
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> input_features = processor(ds["speech"][0], sampling_rate=16000, return_tensors="pt").input_features # Batch size 1
>>> input_features = processor(
... ds["speech"][0], sampling_rate=16000, return_tensors="pt"
>>> ).input_features # Batch size 1
>>> generated_ids = model.generate(input_ids=input_features)
>>> transcription = processor.batch_decode(generated_ids)
......
......@@ -859,12 +859,18 @@ class Speech2Text2ForCausalLM(Speech2Text2PreTrainedModel):
Example:
```python
>>> from transformers import SpeechEncoderDecoderModel, Speech2Text2ForCausalLM, Wav2Vec2Model, Speech2Text2Config, Wav2Vec2Config
>>> from transformers import (
... SpeechEncoderDecoderModel,
... Speech2Text2ForCausalLM,
... Wav2Vec2Model,
... Speech2Text2Config,
... Wav2Vec2Config,
... )
>>> encoder = Wav2Vec2Model(Wav2Vec2Config())
>>> decoder = Speech2Text2ForCausalLM(Speech2Text2Config())
# init speech2text model
>>> model = SpeechEncoderDecoderModel(encoder=encoder, decoder=decoder)
```"""
......
......@@ -471,7 +471,7 @@ SQUEEZEBERT_START_DOCSTRING = r"""
Hierarchy:
```python
```
Internal class hierarchy:
SqueezeBertModel
SqueezeBertEncoder
......@@ -483,7 +483,7 @@ SQUEEZEBERT_START_DOCSTRING = r"""
Data layouts:
```python
```
Input data is in [batch, sequence_length, hidden_size] format.
Data inside the encoder is in [batch, hidden_size, sequence_length] format. But, if `output_hidden_states == True`, the data from inside the encoder is returned in [batch, sequence_length, hidden_size] format.
......
......@@ -1055,11 +1055,11 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel):
```python
>>> from transformers import T5Tokenizer, FlaxT5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = FlaxT5ForConditionalGeneration.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
>>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors='np')
>>> inputs = tokenizer(text, return_tensors="np")
>>> encoder_outputs = model.encode(**inputs)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......@@ -1117,11 +1117,11 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel):
>>> from transformers import T5Tokenizer, FlaxT5ForConditionalGeneration
>>> import jax.numpy as jnp
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = FlaxT5ForConditionalGeneration.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
>>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors='np')
>>> inputs = tokenizer(text, return_tensors="np")
>>> encoder_outputs = model.encode(**inputs)
>>> decoder_start_token_id = model.config.decoder_start_token_id
......@@ -1333,10 +1333,12 @@ FLAX_T5_MODEL_DOCSTRING = """
```python
>>> from transformers import T5Tokenizer, FlaxT5Model
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = FlaxT5Model.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = FlaxT5Model.from_pretrained("t5-small")
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="np").input_ids
>>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="np"
>>> ).input_ids
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="np").input_ids
>>> # forward pass
......@@ -1483,11 +1485,11 @@ class FlaxT5ForConditionalGeneration(FlaxT5PreTrainedModel):
>>> from transformers import T5Tokenizer, FlaxT5ForConditionalGeneration
>>> import jax.numpy as jnp
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = FlaxT5ForConditionalGeneration.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
>>> text = "summarize: My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors='np')
>>> inputs = tokenizer(text, return_tensors="np")
>>> encoder_outputs = model.encode(**inputs)
>>> decoder_start_token_id = model.config.decoder_start_token_id
......@@ -1631,14 +1633,14 @@ FLAX_T5_CONDITIONAL_GENERATION_DOCSTRING = """
```python
>>> from transformers import T5Tokenizer, FlaxT5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = FlaxT5ForConditionalGeneration.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = FlaxT5ForConditionalGeneration.from_pretrained("t5-small")
>>> ARTICLE_TO_SUMMARIZE = "summarize: My friends are cool but they eat too many carbs."
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors='np')
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors="np")
>>> # Generate Summary
>>> summary_ids = model.generate(inputs['input_ids']).sequences
>>> summary_ids = model.generate(inputs["input_ids"]).sequences
>>> print(tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False))
```
"""
......
......@@ -204,12 +204,13 @@ PARALLELIZE_DOCSTRING = r"""
```python
# Here is an example of a device map on a machine with 4 GPUs using t5-3b, which has a total of 24 attention modules:
model = T5ForConditionalGeneration.from_pretrained('t5-3b')
device_map = {0: [0, 1, 2],
model = T5ForConditionalGeneration.from_pretrained("t5-3b")
device_map = {
0: [0, 1, 2],
1: [3, 4, 5, 6, 7, 8, 9],
2: [10, 11, 12, 13, 14, 15, 16],
3: [17, 18, 19, 20, 21, 22, 23]}
3: [17, 18, 19, 20, 21, 22, 23],
}
model.parallelize(device_map)
```
"""
......@@ -220,12 +221,13 @@ DEPARALLELIZE_DOCSTRING = r"""
```python
# On a 4 GPU machine with t5-3b:
model = T5ForConditionalGeneration.from_pretrained('t5-3b')
device_map = {0: [0, 1, 2],
model = T5ForConditionalGeneration.from_pretrained("t5-3b")
device_map = {
0: [0, 1, 2],
1: [3, 4, 5, 6, 7, 8, 9],
2: [10, 11, 12, 13, 14, 15, 16],
3: [17, 18, 19, 20, 21, 22, 23]}
3: [17, 18, 19, 20, 21, 22, 23],
}
model.parallelize(device_map) # Splits the model across several devices
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache()
```
......@@ -1344,10 +1346,12 @@ class T5Model(T5PreTrainedModel):
```python
>>> from transformers import T5Tokenizer, T5Model
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = T5Model.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5Model.from_pretrained("t5-small")
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> # forward pass
......@@ -1542,18 +1546,20 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
```python
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5ForConditionalGeneration.from_pretrained("t5-small")
>>> # training
>>> input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
>>> labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
>>> input_ids = tokenizer("The <extra_id_0> walks in <extra_id_1> park", return_tensors="pt").input_ids
>>> labels = tokenizer("<extra_id_0> cute dog <extra_id_1> the <extra_id_2>", return_tensors="pt").input_ids
>>> outputs = model(input_ids=input_ids, labels=labels)
>>> loss = outputs.loss
>>> logits = outputs.logits
>>> # inference
>>> input_ids = tokenizer("summarize: studies have shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> input_ids = tokenizer(
... "summarize: studies have shown that owning a dog is good for you", return_tensors="pt"
>>> ).input_ids # Batch size 1
>>> outputs = model.generate(input_ids)
>>> print(tokenizer.decode(outputs[0], skip_special_tokens=True))
>>> # studies have shown that owning a dog is good for you.
......@@ -1796,9 +1802,12 @@ class T5EncoderModel(T5PreTrainedModel):
```python
>>> from transformers import T5Tokenizer, T5EncoderModel
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = T5EncoderModel.from_pretrained('t5-small')
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = T5EncoderModel.from_pretrained("t5-small")
>>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
>>> ).input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids)
>>> last_hidden_states = outputs.last_hidden_state
```"""
......
......@@ -1177,10 +1177,12 @@ class TFT5Model(TFT5PreTrainedModel):
```python
>>> from transformers import T5Tokenizer, TFT5Model
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = TFT5Model.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = TFT5Model.from_pretrained("t5-small")
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="tf").input_ids # Batch size 1
>>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="tf"
>>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1
>>> # forward pass
......@@ -1375,18 +1377,20 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
```python
>>> from transformers import T5Tokenizer, TFT5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = TFT5ForConditionalGeneration.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = TFT5ForConditionalGeneration.from_pretrained("t5-small")
>>> # training
>>> inputs = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='tf').input_ids
>>> labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='tf').input_ids
>>> inputs = tokenizer("The <extra_id_0> walks in <extra_id_1> park", return_tensors="tf").input_ids
>>> labels = tokenizer("<extra_id_0> cute dog <extra_id_1> the <extra_id_2>", return_tensors="tf").input_ids
>>> outputs = model(inputs, labels=labels)
>>> loss = outputs.loss
>>> logits = outputs.logits
>>> # inference
>>> inputs = tokenizer("summarize: studies have shown that owning a dog is good for you", return_tensors="tf").input_ids # Batch size 1
>>> inputs = tokenizer(
... "summarize: studies have shown that owning a dog is good for you", return_tensors="tf"
>>> ).input_ids # Batch size 1
>>> outputs = model.generate(inputs)
>>> print(tokenizer.decode(outputs[0], skip_special_tokens=True))
>>> # studies have shown that owning a dog is good for you
......@@ -1633,10 +1637,12 @@ class TFT5EncoderModel(TFT5PreTrainedModel):
```python
>>> from transformers import T5Tokenizer, TFT5EncoderModel
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = TFT5EncoderModel.from_pretrained('t5-small')
>>> tokenizer = T5Tokenizer.from_pretrained("t5-small")
>>> model = TFT5EncoderModel.from_pretrained("t5-small")
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="tf").input_ids # Batch size 1
>>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="tf"
>>> ).input_ids # Batch size 1
>>> outputs = model(input_ids)
```"""
inputs = input_processing(
......
......@@ -133,6 +133,7 @@ class TapasConfig(PretrainedConfig):
```python
>>> from transformers import TapasModel, TapasConfig
>>> # Initializing a default (SQA) Tapas configuration
>>> configuration = TapasConfig()
>>> # Initializing a model from the configuration
......
......@@ -916,12 +916,13 @@ class TapasModel(TapasPreTrainedModel):
>>> from transformers import TapasTokenizer, TapasModel
>>> import pandas as pd
>>> tokenizer = TapasTokenizer.from_pretrained('google/tapas-base')
>>> model = TapasModel.from_pretrained('google/tapas-base')
>>> tokenizer = TapasTokenizer.from_pretrained("google/tapas-base")
>>> model = TapasModel.from_pretrained("google/tapas-base")
>>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... 'Age': ["56", "45", "59"],
... 'Number of movies': ["87", "53", "69"]
>>> data = {
... "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... "Age": ["56", "45", "59"],
... "Number of movies": ["87", "53", "69"],
... }
>>> table = pd.DataFrame.from_dict(data)
>>> queries = ["How many movies has George Clooney played in?", "How old is Brad Pitt?"]
......@@ -1056,17 +1057,22 @@ class TapasForMaskedLM(TapasPreTrainedModel):
>>> from transformers import TapasTokenizer, TapasForMaskedLM
>>> import pandas as pd
>>> tokenizer = TapasTokenizer.from_pretrained('google/tapas-base')
>>> model = TapasForMaskedLM.from_pretrained('google/tapas-base')
>>> tokenizer = TapasTokenizer.from_pretrained("google/tapas-base")
>>> model = TapasForMaskedLM.from_pretrained("google/tapas-base")
>>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... 'Age': ["56", "45", "59"],
... 'Number of movies': ["87", "53", "69"]
>>> data = {
... "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... "Age": ["56", "45", "59"],
... "Number of movies": ["87", "53", "69"],
... }
>>> table = pd.DataFrame.from_dict(data)
>>> inputs = tokenizer(table=table, queries="How many [MASK] has George [MASK] played in?", return_tensors="pt")
>>> labels = tokenizer(table=table, queries="How many movies has George Clooney played in?", return_tensors="pt")["input_ids"]
>>> inputs = tokenizer(
... table=table, queries="How many [MASK] has George [MASK] played in?", return_tensors="pt"
... )
>>> labels = tokenizer(
... table=table, queries="How many movies has George Clooney played in?", return_tensors="pt"
>>> )["input_ids"]
>>> outputs = model(**inputs, labels=labels)
>>> logits = outputs.logits
......@@ -1204,12 +1210,13 @@ class TapasForQuestionAnswering(TapasPreTrainedModel):
>>> from transformers import TapasTokenizer, TapasForQuestionAnswering
>>> import pandas as pd
>>> tokenizer = TapasTokenizer.from_pretrained('google/tapas-base-finetuned-wtq')
>>> model = TapasForQuestionAnswering.from_pretrained('google/tapas-base-finetuned-wtq')
>>> tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-wtq")
>>> model = TapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")
>>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... 'Age': ["56", "45", "59"],
... 'Number of movies': ["87", "53", "69"]
>>> data = {
... "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... "Age": ["56", "45", "59"],
... "Number of movies": ["87", "53", "69"],
... }
>>> table = pd.DataFrame.from_dict(data)
>>> queries = ["How many movies has George Clooney played in?", "How old is Brad Pitt?"]
......@@ -1501,15 +1508,19 @@ class TapasForSequenceClassification(TapasPreTrainedModel):
>>> import torch
>>> import pandas as pd
>>> tokenizer = TapasTokenizer.from_pretrained('google/tapas-base-finetuned-tabfact')
>>> model = TapasForSequenceClassification.from_pretrained('google/tapas-base-finetuned-tabfact')
>>> tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-tabfact")
>>> model = TapasForSequenceClassification.from_pretrained("google/tapas-base-finetuned-tabfact")
>>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... 'Age': ["56", "45", "59"],
... 'Number of movies': ["87", "53", "69"]
>>> data = {
... "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... "Age": ["56", "45", "59"],
... "Number of movies": ["87", "53", "69"],
... }
>>> table = pd.DataFrame.from_dict(data)
>>> queries = ["There is only one actor who is 45 years old", "There are 3 actors which played in more than 60 movies"]
>>> queries = [
... "There is only one actor who is 45 years old",
... "There are 3 actors which played in more than 60 movies",
... ]
>>> inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
>>> labels = torch.tensor([1, 0]) # 1 means entailed, 0 means refuted
......
......@@ -1004,12 +1004,13 @@ class TFTapasModel(TFTapasPreTrainedModel):
>>> from transformers import TapasTokenizer, TapasModel
>>> import pandas as pd
>>> tokenizer = TapasTokenizer.from_pretrained('google/tapas-base')
>>> model = TapasModel.from_pretrained('google/tapas-base')
>>> tokenizer = TapasTokenizer.from_pretrained("google/tapas-base")
>>> model = TapasModel.from_pretrained("google/tapas-base")
>>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... 'Age': ["56", "45", "59"],
... 'Number of movies': ["87", "53", "69"]
>>> data = {
... "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... "Age": ["56", "45", "59"],
... "Number of movies": ["87", "53", "69"],
... }
>>> table = pd.DataFrame.from_dict(data)
>>> queries = ["How many movies has George Clooney played in?", "How old is Brad Pitt?"]
......@@ -1109,17 +1110,22 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss):
>>> from transformers import TapasTokenizer, TapasForMaskedLM
>>> import pandas as pd
>>> tokenizer = TapasTokenizer.from_pretrained('google/tapas-base')
>>> model = TapasForMaskedLM.from_pretrained('google/tapas-base')
>>> tokenizer = TapasTokenizer.from_pretrained("google/tapas-base")
>>> model = TapasForMaskedLM.from_pretrained("google/tapas-base")
>>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... 'Age': ["56", "45", "59"],
... 'Number of movies': ["87", "53", "69"]
>>> data = {
... "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... "Age": ["56", "45", "59"],
... "Number of movies": ["87", "53", "69"],
... }
>>> table = pd.DataFrame.from_dict(data)
>>> inputs = tokenizer(table=table, queries="How many [MASK] has George [MASK] played in?", return_tensors="tf")
>>> labels = tokenizer(table=table, queries="How many movies has George Clooney played in?", return_tensors="tf")["input_ids"]
>>> inputs = tokenizer(
... table=table, queries="How many [MASK] has George [MASK] played in?", return_tensors="tf"
... )
>>> labels = tokenizer(
... table=table, queries="How many movies has George Clooney played in?", return_tensors="tf"
>>> )["input_ids"]
>>> outputs = model(**inputs, labels=labels)
>>> logits = outputs.logits
......@@ -1359,12 +1365,13 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel):
>>> from transformers import TapasTokenizer, TapasForQuestionAnswering
>>> import pandas as pd
>>> tokenizer = TapasTokenizer.from_pretrained('google/tapas-base-finetuned-wtq')
>>> model = TapasForQuestionAnswering.from_pretrained('google/tapas-base-finetuned-wtq')
>>> tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-wtq")
>>> model = TapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")
>>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... 'Age': ["56", "45", "59"],
... 'Number of movies': ["87", "53", "69"]
>>> data = {
... "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... "Age": ["56", "45", "59"],
... "Number of movies": ["87", "53", "69"],
... }
>>> table = pd.DataFrame.from_dict(data)
>>> queries = ["How many movies has George Clooney played in?", "How old is Brad Pitt?"]
......@@ -1681,15 +1688,19 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif
>>> import tensorflow as tf
>>> import pandas as pd
>>> tokenizer = TapasTokenizer.from_pretrained('google/tapas-base-finetuned-tabfact')
>>> model = TapasForSequenceClassification.from_pretrained('google/tapas-base-finetuned-tabfact')
>>> tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-tabfact")
>>> model = TapasForSequenceClassification.from_pretrained("google/tapas-base-finetuned-tabfact")
>>> data = {'Actors': ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... 'Age': ["56", "45", "59"],
... 'Number of movies': ["87", "53", "69"]
>>> data = {
... "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
... "Age": ["56", "45", "59"],
... "Number of movies": ["87", "53", "69"],
... }
>>> table = pd.DataFrame.from_dict(data)
>>> queries = ["There is only one actor who is 45 years old", "There are 3 actors which played in more than 60 movies"]
>>> queries = [
... "There is only one actor who is 45 years old",
... "There are 3 actors which played in more than 60 movies",
... ]
>>> inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
>>> labels = tf.convert_to_tensor([1, 0]) # 1 means entailed, 0 means refuted
......
......@@ -896,8 +896,8 @@ class TrOCRForCausalLM(TrOCRPreTrainedModel):
>>> encoder = ViTModel(ViTConfig())
>>> decoder = TrOCRForCausalLM(TrOCRConfig())
# init vision2text model
>>> model = VisionEncoderDecoderModel(encoder=encoder, decoder=decoder)
```"""
......
......@@ -1270,9 +1270,7 @@ class UniSpeechForPreTraining(UniSpeechPreTrainedModel):
... outputs = model(input_values, mask_time_indices=mask_time_indices)
>>> # compute cosine similarity between predicted (=projected_states) and target (=projected_quantized_states)
>>> cosine_sim = torch.cosine_similarity(
... outputs.projected_states, outputs.projected_quantized_states, dim=-1
... )
>>> cosine_sim = torch.cosine_similarity(outputs.projected_states, outputs.projected_quantized_states, dim=-1)
>>> # show that cosine similarity is much higher than random
>>> assert cosine_sim[mask_time_indices].mean() > 0.5
......
......@@ -1303,9 +1303,7 @@ class UniSpeechSatForPreTraining(UniSpeechSatPreTrainedModel):
... outputs = model(input_values, mask_time_indices=mask_time_indices)
>>> # compute cosine similarity between predicted (=projected_states) and target (=projected_quantized_states)
>>> cosine_sim = torch.cosine_similarity(
... outputs.projected_states, outputs.projected_quantized_states, dim=-1
... )
>>> cosine_sim = torch.cosine_similarity(outputs.projected_states, outputs.projected_quantized_states, dim=-1)
>>> # show that cosine similarity is much higher than random
>>> assert cosine_sim[mask_time_indices].mean() > 0.5
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment