Unverified Commit b5e2b183 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Doc styler examples (#14953)

* Fix bad examples

* Add black formatting to style_doc

* Use first nonempty line

* Put it at the right place

* Don't add spaces to empty lines

* Better templates

* Deal with triple quotes in docstrings

* Result of style_doc

* Enable mdx treatment and fix code examples in MDXs

* Result of doc styler on doc source files

* Last fixes

* Break copy from
parent e13f72fb
......@@ -1064,16 +1064,16 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
>>> from transformers import DistilBertTokenizer, DistilBertForMultipleChoice
>>> import torch
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
>>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased')
>>> tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-cased")
>>> model = DistilBertForMultipleChoice.from_pretrained("distilbert-base-cased")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife."
>>> choice1 = "It is eaten while held in the hand."
>>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
>>> encoding = tokenizer([[prompt, choice0], [prompt, choice1]], return_tensors='pt', padding=True)
>>> outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
>>> encoding = tokenizer([[prompt, choice0], [prompt, choice1]], return_tensors="pt", padding=True)
>>> outputs = model(**{k: v.unsqueeze(0) for k, v in encoding.items()}, labels=labels) # batch size is 1
>>> # the linear classifier still needs to be trained
>>> loss = outputs.loss
......
......@@ -468,9 +468,10 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
```python
>>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
>>> model = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
>>> embeddings = model(input_ids).pooler_output
```"""
......@@ -548,9 +549,10 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
```python
>>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
>>> model = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
>>> embeddings = model(input_ids).pooler_output
```
"""
......@@ -627,14 +629,15 @@ class DPRReader(DPRPretrainedReader):
```python
>>> from transformers import DPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> encoded_inputs = tokenizer(
... questions=["What is love ?"],
... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors='pt'
... )
... questions=["What is love ?"],
... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors="pt",
... )
>>> outputs = model(**encoded_inputs)
>>> start_logits = outputs.start_logits
>>> end_logits = outputs.end_logits
......
......@@ -615,9 +615,10 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder):
```python
>>> from transformers import TFDPRContextEncoder, DPRContextEncoderTokenizer
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> model = TFDPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', from_pt=True)
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"]
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
>>> model = TFDPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base", from_pt=True)
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="tf")["input_ids"]
>>> embeddings = model(input_ids).pooler_output
```
"""
......@@ -715,9 +716,10 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder):
```python
>>> from transformers import TFDPRQuestionEncoder, DPRQuestionEncoderTokenizer
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> model = TFDPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', from_pt=True)
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"]
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
>>> model = TFDPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base", from_pt=True)
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="tf")["input_ids"]
>>> embeddings = model(input_ids).pooler_output
```
"""
......@@ -813,14 +815,15 @@ class TFDPRReader(TFDPRPretrainedReader):
```python
>>> from transformers import TFDPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = TFDPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', from_pt=True)
>>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = TFDPRReader.from_pretrained("facebook/dpr-reader-single-nq-base", from_pt=True)
>>> encoded_inputs = tokenizer(
... questions=["What is love ?"],
... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors='tf'
... )
... questions=["What is love ?"],
... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors="tf",
... )
>>> outputs = model(encoded_inputs)
>>> start_logits = outputs.start_logits
>>> end_logits = outputs.end_logits
......
......@@ -280,14 +280,15 @@ class CustomDPRReaderTokenizerMixin:
```python
>>> from transformers import DPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> encoded_inputs = tokenizer(
... questions=["What is love ?"],
... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors='pt'
... )
... questions=["What is love ?"],
... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors="pt",
... )
>>> outputs = model(**encoded_inputs)
>>> predicted_spans = tokenizer.decode_best_spans(encoded_inputs, outputs)
>>> print(predicted_spans[0].text) # best span
......
......@@ -281,14 +281,15 @@ class CustomDPRReaderTokenizerMixin:
```python
>>> from transformers import DPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> encoded_inputs = tokenizer(
... questions=["What is love ?"],
... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors='pt'
... )
... questions=["What is love ?"],
... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors="pt",
... )
>>> outputs = model(**encoded_inputs)
>>> predicted_spans = tokenizer.decode_best_spans(encoded_inputs, outputs)
>>> print(predicted_spans[0].text) # best span
......
......@@ -1095,10 +1095,12 @@ class ElectraForPreTraining(ElectraPreTrainedModel):
>>> from transformers import ElectraTokenizer, ElectraForPreTraining
>>> import torch
>>> tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
>>> model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator')
>>> tokenizer = ElectraTokenizer.from_pretrained("google/electra-small-discriminator")
>>> model = ElectraForPreTraining.from_pretrained("google/electra-small-discriminator")
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(
... 0
>>> ) # Batch size 1
>>> logits = model(input_ids).logits
```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
......@@ -821,8 +821,8 @@ FLAX_ELECTRA_FOR_PRETRAINING_DOCSTRING = """
```python
>>> from transformers import ElectraTokenizer, FlaxElectraForPreTraining
>>> tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
>>> model = FlaxElectraForPreTraining.from_pretrained('google/electra-small-discriminator')
>>> tokenizer = ElectraTokenizer.from_pretrained("google/electra-small-discriminator")
>>> model = FlaxElectraForPreTraining.from_pretrained("google/electra-small-discriminator")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="np")
>>> outputs = model(**inputs)
......
......@@ -1088,8 +1088,8 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel):
>>> import tensorflow as tf
>>> from transformers import ElectraTokenizer, TFElectraForPreTraining
>>> tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
>>> model = TFElectraForPreTraining.from_pretrained('google/electra-small-discriminator')
>>> tokenizer = ElectraTokenizer.from_pretrained("google/electra-small-discriminator")
>>> model = TFElectraForPreTraining.from_pretrained("google/electra-small-discriminator")
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
>>> outputs = model(input_ids)
>>> scores = outputs[0]
......
......@@ -57,17 +57,17 @@ class EncoderDecoderConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> config_encoder = model.config.encoder
>>> config_decoder = model.config.decoder
>>> config_decoder = model.config.decoder
>>> # set decoder config to causal lm
>>> config_decoder.is_decoder = True
>>> config_decoder.add_cross_attention = True
>>> # Saving the model, including its configuration
>>> model.save_pretrained('my-model')
>>> model.save_pretrained("my-model")
>>> # loading model and config from pretrained folder
>>> encoder_decoder_config = EncoderDecoderConfig.from_pretrained('my-model')
>>> model = EncoderDecoderModel.from_pretrained('my-model', config=encoder_decoder_config)
>>> encoder_decoder_config = EncoderDecoderConfig.from_pretrained("my-model")
>>> model = EncoderDecoderModel.from_pretrained("my-model", config=encoder_decoder_config)
```"""
model_type = "encoder-decoder"
is_composition = True
......
......@@ -336,8 +336,9 @@ class EncoderDecoderModel(PreTrainedModel):
```python
>>> from transformers import EncoderDecoderModel
>>> # initialize a bert2bert from two pretrained BERT models. Note that the cross-attention layers will be randomly initialized
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased')
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "bert-base-uncased")
>>> # saving model after fine-tuning
>>> model.save_pretrained("./bert2bert")
>>> # load fine-tuned model
......@@ -448,8 +449,10 @@ class EncoderDecoderModel(PreTrainedModel):
>>> from transformers import EncoderDecoderModel, BertTokenizer
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert from pre-trained checkpoints
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained(
... "bert-base-uncased", "bert-base-uncased"
>>> ) # initialize Bert2Bert from pre-trained checkpoints
>>> # training
>>> model.config.decoder_start_token_id = tokenizer.cls_token_id
......
......@@ -431,12 +431,12 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
>>> from transformers import FlaxEncoderDecoderModel, BertTokenizer
>>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-cased', 'gpt2')
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> text = "My friends are cool but they eat too many carbs."
>>> input_ids = tokenizer.encode(text, return_tensors='np')
>>> input_ids = tokenizer.encode(text, return_tensors="np")
>>> encoder_outputs = model.encode(input_ids)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......@@ -509,12 +509,12 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
>>> import jax.numpy as jnp
>>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-cased', 'gpt2')
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> text = "My friends are cool but they eat too many carbs."
>>> input_ids = tokenizer.encode(text, max_length=1024, return_tensors='np')
>>> input_ids = tokenizer.encode(text, max_length=1024, return_tensors="np")
>>> encoder_outputs = model.encode(input_ids)
>>> decoder_start_token_id = model.config.decoder.bos_token_id
......@@ -636,15 +636,15 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
>>> # load a fine-tuned bert2gpt2 model
>>> model = FlaxEncoderDecoderModel.from_pretrained("patrickvonplaten/bert2gpt2-cnn_dailymail-fp16")
>>> # load input & output tokenizer
>>> tokenizer_input = BertTokenizer.from_pretrained('bert-base-cased')
>>> tokenizer_output = GPT2Tokenizer.from_pretrained('gpt2')
>>> tokenizer_input = BertTokenizer.from_pretrained("bert-base-cased")
>>> tokenizer_output = GPT2Tokenizer.from_pretrained("gpt2")
>>> article = '''Sigma Alpha Epsilon is under fire for a video showing party-bound fraternity members
... singing a racist chant. SAE's national chapter suspended the students,
... but University of Oklahoma President David Boren took it a step further,
... saying the university's affiliation with the fraternity is permanently done.'''
>>> singing a racist chant. SAE's national chapter suspended the students,
>>> but University of Oklahoma President David Boren took it a step further,
>>> saying the university's affiliation with the fraternity is permanently done.'''
>>> input_ids = tokenizer_input(article, add_special_tokens=True, return_tensors='np').input_ids
>>> input_ids = tokenizer_input(article, add_special_tokens=True, return_tensors="np").input_ids
>>> # use GPT2's eos_token as the pad as well as eos token
>>> model.config.eos_token_id = model.config.decoder.eos_token_id
......@@ -654,7 +654,8 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
>>> summary = tokenizer_output.batch_decode(sequences, skip_special_tokens=True)[0]
>>> assert summary == "SAS Alpha Epsilon suspended Sigma Alpha Epsilon members"
```"""
```
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
......@@ -781,8 +782,9 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
```python
>>> from transformers import FlaxEncoderDecoderModel
>>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-cased', 'gpt2')
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
>>> # saving model after fine-tuning
>>> model.save_pretrained("./bert2gpt2")
>>> # load fine-tuned model
......
......@@ -280,6 +280,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel):
```python
>>> from transformers import TFEncoderDecoderModel
>>> model = TFEncoderDecoderModel.from_pretrained("ydshieh/bert2bert-cnn_dailymail-fp16")
```"""
......@@ -347,8 +348,9 @@ class TFEncoderDecoderModel(TFPreTrainedModel):
```python
>>> from transformers import TFEncoderDecoderModel
>>> # initialize a bert2gpt2 from two pretrained BERT models. Note that the cross-attention layers will be randomly initialized
>>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'gpt2')
>>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "gpt2")
>>> # saving model after fine-tuning
>>> model.save_pretrained("./bert2gpt2")
>>> # load fine-tuned model
......@@ -486,12 +488,14 @@ class TFEncoderDecoderModel(TFPreTrainedModel):
>>> from transformers import TFEncoderDecoderModel, BertTokenizer
>>> # initialize a bert2gpt2 from a pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-cased', 'gpt2')
>>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> # forward
>>> input_ids = tokenizer.encode("Hello, my dog is cute", add_special_tokens=True, return_tensors='tf') # Batch size 1
>>> input_ids = tokenizer.encode(
... "Hello, my dog is cute", add_special_tokens=True, return_tensors="tf"
>>> ) # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
>>> # training
......
......@@ -674,8 +674,9 @@ class FNetForPreTraining(FNetPreTrainedModel):
```python
>>> from transformers import FNetTokenizer, FNetForPreTraining
>>> import torch
>>> tokenizer = FNetTokenizer.from_pretrained('google/fnet-base')
>>> model = FNetForPreTraining.from_pretrained('google/fnet-base')
>>> tokenizer = FNetTokenizer.from_pretrained("google/fnet-base")
>>> model = FNetForPreTraining.from_pretrained("google/fnet-base")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
>>> prediction_logits = outputs.prediction_logits
......@@ -822,14 +823,15 @@ class FNetForNextSentencePrediction(FNetPreTrainedModel):
```python
>>> from transformers import FNetTokenizer, FNetForNextSentencePrediction
>>> import torch
>>> tokenizer = FNetTokenizer.from_pretrained('google/fnet-base')
>>> model = FNetForNextSentencePrediction.from_pretrained('google/fnet-base')
>>> tokenizer = FNetTokenizer.from_pretrained("google/fnet-base")
>>> model = FNetForNextSentencePrediction.from_pretrained("google/fnet-base")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
>>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> logits = outputs.logits
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random
```"""
if "next_sentence_label" in kwargs:
......
......@@ -275,7 +275,7 @@ class FNetTokenizer(PreTrainedTokenizer):
Create a mask from the two sequences passed to be used in a sequence-pair classification task. An FNet sequence
pair mask has the following format: :
```python
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second sequence |
```
......
......@@ -120,7 +120,7 @@ class FSMTConfig(PretrainedConfig):
```python
>>> from transformers import FSMTConfig, FSMTModel
>>> config = FSMTConfig.from_pretrained('facebook/wmt19-en-ru')
>>> config = FSMTConfig.from_pretrained("facebook/wmt19-en-ru")
>>> model = FSMTModel(config)
```"""
model_type = "fsmt"
......
......@@ -1114,10 +1114,10 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
>>> from transformers import FunnelTokenizer, FunnelForPreTraining
>>> import torch
>>> tokenizer = FunnelTokenizer.from_pretrained('funnel-transformer/small')
>>> model = FunnelForPreTraining.from_pretrained('funnel-transformer/small')
>>> tokenizer = FunnelTokenizer.from_pretrained("funnel-transformer/small")
>>> model = FunnelForPreTraining.from_pretrained("funnel-transformer/small")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "pt")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> logits = model(**inputs).logits
```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
......@@ -1273,10 +1273,10 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
>>> from transformers import FunnelTokenizer, TFFunnelForPreTraining
>>> import torch
>>> tokenizer = TFFunnelTokenizer.from_pretrained('funnel-transformer/small')
>>> model = TFFunnelForPreTraining.from_pretrained('funnel-transformer/small')
>>> tokenizer = TFFunnelTokenizer.from_pretrained("funnel-transformer/small")
>>> model = TFFunnelForPreTraining.from_pretrained("funnel-transformer/small")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "tf")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> logits = model(inputs).logits
```"""
inputs = input_processing(
......
......@@ -631,12 +631,13 @@ PARALLELIZE_DOCSTRING = r"""
```python
# Here is an example of a device map on a machine with 4 GPUs using gpt2-xl, which has a total of 48 attention modules:
model = GPT2LMHeadModel.from_pretrained('gpt2-xl')
device_map = {0: [0, 1, 2, 3, 4, 5, 6, 7, 8],
1: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
2: [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34],
3: [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]}
model = GPT2LMHeadModel.from_pretrained("gpt2-xl")
device_map = {
0: [0, 1, 2, 3, 4, 5, 6, 7, 8],
1: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
2: [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34],
3: [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
}
model.parallelize(device_map)
```
"""
......@@ -647,14 +648,15 @@ DEPARALLELIZE_DOCSTRING = r"""
```python
# On a 4 GPU machine with gpt2-large:
model = GPT2LMHeadModel.from_pretrained('gpt2-large')
device_map = {0: [0, 1, 2, 3, 4, 5, 6, 7],
1: [8, 9, 10, 11, 12, 13, 14, 15],
2: [16, 17, 18, 19, 20, 21, 22, 23],
3: [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]}
model.parallelize(device_map) # Splits the model across several devices
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache()
model = GPT2LMHeadModel.from_pretrained("gpt2-large")
device_map = {
0: [0, 1, 2, 3, 4, 5, 6, 7],
1: [8, 9, 10, 11, 12, 13, 14, 15],
2: [16, 17, 18, 19, 20, 21, 22, 23],
3: [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
}
model.parallelize(device_map) # Splits the model across several devices
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache()
```
"""
......@@ -1224,13 +1226,15 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
>>> import torch
>>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
>>> model = GPT2DoubleHeadsModel.from_pretrained("gpt2")
>>> # Add a [CLS] to the vocabulary (we should train it also!)
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'})
>>> num_added_tokens = tokenizer.add_special_tokens({"cls_token": "[CLS]"})
>>> embedding_layer = model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
>>> embedding_layer = model.resize_token_embeddings(
... len(tokenizer)
>>> ) # Update the model embeddings with the new vocabulary size
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
>>> encoded_choices = [tokenizer.encode(s) for s in choices]
......
......@@ -1033,13 +1033,15 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
>>> import tensorflow as tf
>>> from transformers import GPT2Tokenizer, TFGPT2DoubleHeadsModel
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = TFGPT2DoubleHeadsModel.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
>>> model = TFGPT2DoubleHeadsModel.from_pretrained("gpt2")
>>> # Add a [CLS] to the vocabulary (we should train it also!)
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'})
>>> num_added_tokens = tokenizer.add_special_tokens({"cls_token": "[CLS]"})
>>> embedding_layer = model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size
>>> embedding_layer = model.resize_token_embeddings(
... len(tokenizer)
>>> ) # Update the model embeddings with the new vocabulary size
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
>>> encoded_choices = [tokenizer.encode(s) for s in choices]
......
......@@ -412,11 +412,13 @@ PARALLELIZE_DOCSTRING = r"""
```python
# Here is an example of a device map on a machine with 4 GPUs using gpt-j-6B, which has a total of 28 attention modules:
model = GPTJForCausalLM.from_pretrained('EleutherAI/gpt-j-6B')
device_map = {0: [0, 1, 2, 3, 4, 5, 6],
1: [7, 8, 9, 10, 11, 12, 13],
2: [14, 15, 16, 17, 18, 19, 20],
3: [21, 22, 23, 24, 25, 26, 27]}
model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
device_map = {
0: [0, 1, 2, 3, 4, 5, 6],
1: [7, 8, 9, 10, 11, 12, 13],
2: [14, 15, 16, 17, 18, 19, 20],
3: [21, 22, 23, 24, 25, 26, 27],
}
model.parallelize(device_map)
```
"""
......@@ -428,13 +430,15 @@ DEPARALLELIZE_DOCSTRING = r"""
```python
# On a 4 GPU machine with gpt-j-6B:
model = GPTJForCausalLM.from_pretrained('EleutherAI/gpt-j-6B')
device_map = {0: [0, 1, 2, 3, 4, 5, 6],
1: [7, 8, 9, 10, 11, 12, 13],
2: [14, 15, 16, 17, 18, 19, 20],
3: [21, 22, 23, 24, 25, 26, 27]}
model.parallelize(device_map) # Splits the model across several devices
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache()
model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
device_map = {
0: [0, 1, 2, 3, 4, 5, 6],
1: [7, 8, 9, 10, 11, 12, 13],
2: [14, 15, 16, 17, 18, 19, 20],
3: [21, 22, 23, 24, 25, 26, 27],
}
model.parallelize(device_map) # Splits the model across several devices
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache()
```
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment