Unverified Commit b5e2b183 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Doc styler examples (#14953)

* Fix bad examples

* Add black formatting to style_doc

* Use first nonempty line

* Put it at the right place

* Don't add spaces to empty lines

* Better templates

* Deal with triple quotes in docstrings

* Result of style_doc

* Enable mdx treatment and fix code examples in MDXs

* Result of doc styler on doc source files

* Last fixes

* Break copy from
parent e13f72fb
...@@ -1064,16 +1064,16 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel): ...@@ -1064,16 +1064,16 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
>>> from transformers import DistilBertTokenizer, DistilBertForMultipleChoice >>> from transformers import DistilBertTokenizer, DistilBertForMultipleChoice
>>> import torch >>> import torch
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased') >>> tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-cased")
>>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased') >>> model = DistilBertForMultipleChoice.from_pretrained("distilbert-base-cased")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife." >>> choice0 = "It is eaten with a fork and a knife."
>>> choice1 = "It is eaten while held in the hand." >>> choice1 = "It is eaten while held in the hand."
>>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1 >>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
>>> encoding = tokenizer([[prompt, choice0], [prompt, choice1]], return_tensors='pt', padding=True) >>> encoding = tokenizer([[prompt, choice0], [prompt, choice1]], return_tensors="pt", padding=True)
>>> outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1 >>> outputs = model(**{k: v.unsqueeze(0) for k, v in encoding.items()}, labels=labels) # batch size is 1
>>> # the linear classifier still needs to be trained >>> # the linear classifier still needs to be trained
>>> loss = outputs.loss >>> loss = outputs.loss
......
...@@ -468,9 +468,10 @@ class DPRContextEncoder(DPRPretrainedContextEncoder): ...@@ -468,9 +468,10 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
```python ```python
>>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer >>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base') >>> tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"] >>> model = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
>>> embeddings = model(input_ids).pooler_output >>> embeddings = model(input_ids).pooler_output
```""" ```"""
...@@ -548,9 +549,10 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder): ...@@ -548,9 +549,10 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
```python ```python
>>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer >>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base') >>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"] >>> model = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
>>> embeddings = model(input_ids).pooler_output >>> embeddings = model(input_ids).pooler_output
``` ```
""" """
...@@ -627,14 +629,15 @@ class DPRReader(DPRPretrainedReader): ...@@ -627,14 +629,15 @@ class DPRReader(DPRPretrainedReader):
```python ```python
>>> from transformers import DPRReader, DPRReaderTokenizer >>> from transformers import DPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base') >>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> encoded_inputs = tokenizer( >>> encoded_inputs = tokenizer(
... questions=["What is love ?"], ... questions=["What is love ?"],
... titles=["Haddaway"], ... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"], ... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors='pt' ... return_tensors="pt",
... ) ... )
>>> outputs = model(**encoded_inputs) >>> outputs = model(**encoded_inputs)
>>> start_logits = outputs.start_logits >>> start_logits = outputs.start_logits
>>> end_logits = outputs.end_logits >>> end_logits = outputs.end_logits
......
...@@ -615,9 +615,10 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder): ...@@ -615,9 +615,10 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder):
```python ```python
>>> from transformers import TFDPRContextEncoder, DPRContextEncoderTokenizer >>> from transformers import TFDPRContextEncoder, DPRContextEncoderTokenizer
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> model = TFDPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', from_pt=True) >>> tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"] >>> model = TFDPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base", from_pt=True)
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="tf")["input_ids"]
>>> embeddings = model(input_ids).pooler_output >>> embeddings = model(input_ids).pooler_output
``` ```
""" """
...@@ -715,9 +716,10 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder): ...@@ -715,9 +716,10 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder):
```python ```python
>>> from transformers import TFDPRQuestionEncoder, DPRQuestionEncoderTokenizer >>> from transformers import TFDPRQuestionEncoder, DPRQuestionEncoderTokenizer
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> model = TFDPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', from_pt=True) >>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"] >>> model = TFDPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base", from_pt=True)
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="tf")["input_ids"]
>>> embeddings = model(input_ids).pooler_output >>> embeddings = model(input_ids).pooler_output
``` ```
""" """
...@@ -813,14 +815,15 @@ class TFDPRReader(TFDPRPretrainedReader): ...@@ -813,14 +815,15 @@ class TFDPRReader(TFDPRPretrainedReader):
```python ```python
>>> from transformers import TFDPRReader, DPRReaderTokenizer >>> from transformers import TFDPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = TFDPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', from_pt=True) >>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = TFDPRReader.from_pretrained("facebook/dpr-reader-single-nq-base", from_pt=True)
>>> encoded_inputs = tokenizer( >>> encoded_inputs = tokenizer(
... questions=["What is love ?"], ... questions=["What is love ?"],
... titles=["Haddaway"], ... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"], ... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors='tf' ... return_tensors="tf",
... ) ... )
>>> outputs = model(encoded_inputs) >>> outputs = model(encoded_inputs)
>>> start_logits = outputs.start_logits >>> start_logits = outputs.start_logits
>>> end_logits = outputs.end_logits >>> end_logits = outputs.end_logits
......
...@@ -280,14 +280,15 @@ class CustomDPRReaderTokenizerMixin: ...@@ -280,14 +280,15 @@ class CustomDPRReaderTokenizerMixin:
```python ```python
>>> from transformers import DPRReader, DPRReaderTokenizer >>> from transformers import DPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base') >>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> encoded_inputs = tokenizer( >>> encoded_inputs = tokenizer(
... questions=["What is love ?"], ... questions=["What is love ?"],
... titles=["Haddaway"], ... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"], ... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors='pt' ... return_tensors="pt",
... ) ... )
>>> outputs = model(**encoded_inputs) >>> outputs = model(**encoded_inputs)
>>> predicted_spans = tokenizer.decode_best_spans(encoded_inputs, outputs) >>> predicted_spans = tokenizer.decode_best_spans(encoded_inputs, outputs)
>>> print(predicted_spans[0].text) # best span >>> print(predicted_spans[0].text) # best span
......
...@@ -281,14 +281,15 @@ class CustomDPRReaderTokenizerMixin: ...@@ -281,14 +281,15 @@ class CustomDPRReaderTokenizerMixin:
```python ```python
>>> from transformers import DPRReader, DPRReaderTokenizer >>> from transformers import DPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base') >>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> encoded_inputs = tokenizer( >>> encoded_inputs = tokenizer(
... questions=["What is love ?"], ... questions=["What is love ?"],
... titles=["Haddaway"], ... titles=["Haddaway"],
... texts=["'What Is Love' is a song recorded by the artist Haddaway"], ... texts=["'What Is Love' is a song recorded by the artist Haddaway"],
... return_tensors='pt' ... return_tensors="pt",
... ) ... )
>>> outputs = model(**encoded_inputs) >>> outputs = model(**encoded_inputs)
>>> predicted_spans = tokenizer.decode_best_spans(encoded_inputs, outputs) >>> predicted_spans = tokenizer.decode_best_spans(encoded_inputs, outputs)
>>> print(predicted_spans[0].text) # best span >>> print(predicted_spans[0].text) # best span
......
...@@ -1095,10 +1095,12 @@ class ElectraForPreTraining(ElectraPreTrainedModel): ...@@ -1095,10 +1095,12 @@ class ElectraForPreTraining(ElectraPreTrainedModel):
>>> from transformers import ElectraTokenizer, ElectraForPreTraining >>> from transformers import ElectraTokenizer, ElectraForPreTraining
>>> import torch >>> import torch
>>> tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator') >>> tokenizer = ElectraTokenizer.from_pretrained("google/electra-small-discriminator")
>>> model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator') >>> model = ElectraForPreTraining.from_pretrained("google/electra-small-discriminator")
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(
... 0
>>> ) # Batch size 1
>>> logits = model(input_ids).logits >>> logits = model(input_ids).logits
```""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
...@@ -821,8 +821,8 @@ FLAX_ELECTRA_FOR_PRETRAINING_DOCSTRING = """ ...@@ -821,8 +821,8 @@ FLAX_ELECTRA_FOR_PRETRAINING_DOCSTRING = """
```python ```python
>>> from transformers import ElectraTokenizer, FlaxElectraForPreTraining >>> from transformers import ElectraTokenizer, FlaxElectraForPreTraining
>>> tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator') >>> tokenizer = ElectraTokenizer.from_pretrained("google/electra-small-discriminator")
>>> model = FlaxElectraForPreTraining.from_pretrained('google/electra-small-discriminator') >>> model = FlaxElectraForPreTraining.from_pretrained("google/electra-small-discriminator")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="np") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="np")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
......
...@@ -1088,8 +1088,8 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel): ...@@ -1088,8 +1088,8 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel):
>>> import tensorflow as tf >>> import tensorflow as tf
>>> from transformers import ElectraTokenizer, TFElectraForPreTraining >>> from transformers import ElectraTokenizer, TFElectraForPreTraining
>>> tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator') >>> tokenizer = ElectraTokenizer.from_pretrained("google/electra-small-discriminator")
>>> model = TFElectraForPreTraining.from_pretrained('google/electra-small-discriminator') >>> model = TFElectraForPreTraining.from_pretrained("google/electra-small-discriminator")
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1 >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
>>> outputs = model(input_ids) >>> outputs = model(input_ids)
>>> scores = outputs[0] >>> scores = outputs[0]
......
...@@ -57,17 +57,17 @@ class EncoderDecoderConfig(PretrainedConfig): ...@@ -57,17 +57,17 @@ class EncoderDecoderConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> config_encoder = model.config.encoder >>> config_encoder = model.config.encoder
>>> config_decoder = model.config.decoder >>> config_decoder = model.config.decoder
>>> # set decoder config to causal lm >>> # set decoder config to causal lm
>>> config_decoder.is_decoder = True >>> config_decoder.is_decoder = True
>>> config_decoder.add_cross_attention = True >>> config_decoder.add_cross_attention = True
>>> # Saving the model, including its configuration >>> # Saving the model, including its configuration
>>> model.save_pretrained('my-model') >>> model.save_pretrained("my-model")
>>> # loading model and config from pretrained folder >>> # loading model and config from pretrained folder
>>> encoder_decoder_config = EncoderDecoderConfig.from_pretrained('my-model') >>> encoder_decoder_config = EncoderDecoderConfig.from_pretrained("my-model")
>>> model = EncoderDecoderModel.from_pretrained('my-model', config=encoder_decoder_config) >>> model = EncoderDecoderModel.from_pretrained("my-model", config=encoder_decoder_config)
```""" ```"""
model_type = "encoder-decoder" model_type = "encoder-decoder"
is_composition = True is_composition = True
......
...@@ -336,8 +336,9 @@ class EncoderDecoderModel(PreTrainedModel): ...@@ -336,8 +336,9 @@ class EncoderDecoderModel(PreTrainedModel):
```python ```python
>>> from transformers import EncoderDecoderModel >>> from transformers import EncoderDecoderModel
>>> # initialize a bert2bert from two pretrained BERT models. Note that the cross-attention layers will be randomly initialized >>> # initialize a bert2bert from two pretrained BERT models. Note that the cross-attention layers will be randomly initialized
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') >>> model = EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "bert-base-uncased")
>>> # saving model after fine-tuning >>> # saving model after fine-tuning
>>> model.save_pretrained("./bert2bert") >>> model.save_pretrained("./bert2bert")
>>> # load fine-tuned model >>> # load fine-tuned model
...@@ -448,8 +449,10 @@ class EncoderDecoderModel(PreTrainedModel): ...@@ -448,8 +449,10 @@ class EncoderDecoderModel(PreTrainedModel):
>>> from transformers import EncoderDecoderModel, BertTokenizer >>> from transformers import EncoderDecoderModel, BertTokenizer
>>> import torch >>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') >>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
>>> model = EncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert from pre-trained checkpoints >>> model = EncoderDecoderModel.from_encoder_decoder_pretrained(
... "bert-base-uncased", "bert-base-uncased"
>>> ) # initialize Bert2Bert from pre-trained checkpoints
>>> # training >>> # training
>>> model.config.decoder_start_token_id = tokenizer.cls_token_id >>> model.config.decoder_start_token_id = tokenizer.cls_token_id
......
...@@ -431,12 +431,12 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -431,12 +431,12 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
>>> from transformers import FlaxEncoderDecoderModel, BertTokenizer >>> from transformers import FlaxEncoderDecoderModel, BertTokenizer
>>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized >>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-cased', 'gpt2') >>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased') >>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> input_ids = tokenizer.encode(text, return_tensors='np') >>> input_ids = tokenizer.encode(text, return_tensors="np")
>>> encoder_outputs = model.encode(input_ids) >>> encoder_outputs = model.encode(input_ids)
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
...@@ -509,12 +509,12 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -509,12 +509,12 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
>>> import jax.numpy as jnp >>> import jax.numpy as jnp
>>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized >>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-cased', 'gpt2') >>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased') >>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> input_ids = tokenizer.encode(text, max_length=1024, return_tensors='np') >>> input_ids = tokenizer.encode(text, max_length=1024, return_tensors="np")
>>> encoder_outputs = model.encode(input_ids) >>> encoder_outputs = model.encode(input_ids)
>>> decoder_start_token_id = model.config.decoder.bos_token_id >>> decoder_start_token_id = model.config.decoder.bos_token_id
...@@ -636,15 +636,15 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -636,15 +636,15 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
>>> # load a fine-tuned bert2gpt2 model >>> # load a fine-tuned bert2gpt2 model
>>> model = FlaxEncoderDecoderModel.from_pretrained("patrickvonplaten/bert2gpt2-cnn_dailymail-fp16") >>> model = FlaxEncoderDecoderModel.from_pretrained("patrickvonplaten/bert2gpt2-cnn_dailymail-fp16")
>>> # load input & output tokenizer >>> # load input & output tokenizer
>>> tokenizer_input = BertTokenizer.from_pretrained('bert-base-cased') >>> tokenizer_input = BertTokenizer.from_pretrained("bert-base-cased")
>>> tokenizer_output = GPT2Tokenizer.from_pretrained('gpt2') >>> tokenizer_output = GPT2Tokenizer.from_pretrained("gpt2")
>>> article = '''Sigma Alpha Epsilon is under fire for a video showing party-bound fraternity members >>> article = '''Sigma Alpha Epsilon is under fire for a video showing party-bound fraternity members
... singing a racist chant. SAE's national chapter suspended the students, >>> singing a racist chant. SAE's national chapter suspended the students,
... but University of Oklahoma President David Boren took it a step further, >>> but University of Oklahoma President David Boren took it a step further,
... saying the university's affiliation with the fraternity is permanently done.''' >>> saying the university's affiliation with the fraternity is permanently done.'''
>>> input_ids = tokenizer_input(article, add_special_tokens=True, return_tensors='np').input_ids >>> input_ids = tokenizer_input(article, add_special_tokens=True, return_tensors="np").input_ids
>>> # use GPT2's eos_token as the pad as well as eos token >>> # use GPT2's eos_token as the pad as well as eos token
>>> model.config.eos_token_id = model.config.decoder.eos_token_id >>> model.config.eos_token_id = model.config.decoder.eos_token_id
...@@ -654,7 +654,8 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -654,7 +654,8 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
>>> summary = tokenizer_output.batch_decode(sequences, skip_special_tokens=True)[0] >>> summary = tokenizer_output.batch_decode(sequences, skip_special_tokens=True)[0]
>>> assert summary == "SAS Alpha Epsilon suspended Sigma Alpha Epsilon members" >>> assert summary == "SAS Alpha Epsilon suspended Sigma Alpha Epsilon members"
```""" ```
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = ( output_hidden_states = (
...@@ -781,8 +782,9 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -781,8 +782,9 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel):
```python ```python
>>> from transformers import FlaxEncoderDecoderModel >>> from transformers import FlaxEncoderDecoderModel
>>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized >>> # initialize a bert2gpt2 from pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-cased', 'gpt2') >>> model = FlaxEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
>>> # saving model after fine-tuning >>> # saving model after fine-tuning
>>> model.save_pretrained("./bert2gpt2") >>> model.save_pretrained("./bert2gpt2")
>>> # load fine-tuned model >>> # load fine-tuned model
......
...@@ -280,6 +280,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel): ...@@ -280,6 +280,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel):
```python ```python
>>> from transformers import TFEncoderDecoderModel >>> from transformers import TFEncoderDecoderModel
>>> model = TFEncoderDecoderModel.from_pretrained("ydshieh/bert2bert-cnn_dailymail-fp16") >>> model = TFEncoderDecoderModel.from_pretrained("ydshieh/bert2bert-cnn_dailymail-fp16")
```""" ```"""
...@@ -347,8 +348,9 @@ class TFEncoderDecoderModel(TFPreTrainedModel): ...@@ -347,8 +348,9 @@ class TFEncoderDecoderModel(TFPreTrainedModel):
```python ```python
>>> from transformers import TFEncoderDecoderModel >>> from transformers import TFEncoderDecoderModel
>>> # initialize a bert2gpt2 from two pretrained BERT models. Note that the cross-attention layers will be randomly initialized >>> # initialize a bert2gpt2 from two pretrained BERT models. Note that the cross-attention layers will be randomly initialized
>>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-uncased', 'gpt2') >>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "gpt2")
>>> # saving model after fine-tuning >>> # saving model after fine-tuning
>>> model.save_pretrained("./bert2gpt2") >>> model.save_pretrained("./bert2gpt2")
>>> # load fine-tuned model >>> # load fine-tuned model
...@@ -486,12 +488,14 @@ class TFEncoderDecoderModel(TFPreTrainedModel): ...@@ -486,12 +488,14 @@ class TFEncoderDecoderModel(TFPreTrainedModel):
>>> from transformers import TFEncoderDecoderModel, BertTokenizer >>> from transformers import TFEncoderDecoderModel, BertTokenizer
>>> # initialize a bert2gpt2 from a pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized >>> # initialize a bert2gpt2 from a pretrained BERT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained('bert-base-cased', 'gpt2') >>> model = TFEncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "gpt2")
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased') >>> tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
>>> # forward >>> # forward
>>> input_ids = tokenizer.encode("Hello, my dog is cute", add_special_tokens=True, return_tensors='tf') # Batch size 1 >>> input_ids = tokenizer.encode(
... "Hello, my dog is cute", add_special_tokens=True, return_tensors="tf"
>>> ) # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids) >>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
>>> # training >>> # training
......
...@@ -674,8 +674,9 @@ class FNetForPreTraining(FNetPreTrainedModel): ...@@ -674,8 +674,9 @@ class FNetForPreTraining(FNetPreTrainedModel):
```python ```python
>>> from transformers import FNetTokenizer, FNetForPreTraining >>> from transformers import FNetTokenizer, FNetForPreTraining
>>> import torch >>> import torch
>>> tokenizer = FNetTokenizer.from_pretrained('google/fnet-base')
>>> model = FNetForPreTraining.from_pretrained('google/fnet-base') >>> tokenizer = FNetTokenizer.from_pretrained("google/fnet-base")
>>> model = FNetForPreTraining.from_pretrained("google/fnet-base")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> prediction_logits = outputs.prediction_logits >>> prediction_logits = outputs.prediction_logits
...@@ -822,14 +823,15 @@ class FNetForNextSentencePrediction(FNetPreTrainedModel): ...@@ -822,14 +823,15 @@ class FNetForNextSentencePrediction(FNetPreTrainedModel):
```python ```python
>>> from transformers import FNetTokenizer, FNetForNextSentencePrediction >>> from transformers import FNetTokenizer, FNetForNextSentencePrediction
>>> import torch >>> import torch
>>> tokenizer = FNetTokenizer.from_pretrained('google/fnet-base')
>>> model = FNetForNextSentencePrediction.from_pretrained('google/fnet-base') >>> tokenizer = FNetTokenizer.from_pretrained("google/fnet-base")
>>> model = FNetForNextSentencePrediction.from_pretrained("google/fnet-base")
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light." >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt') >>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
>>> outputs = model(**encoding, labels=torch.LongTensor([1])) >>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> logits = outputs.logits >>> logits = outputs.logits
>>> assert logits[0, 0] < logits[0, 1] # next sentence was random >>> assert logits[0, 0] < logits[0, 1] # next sentence was random
```""" ```"""
if "next_sentence_label" in kwargs: if "next_sentence_label" in kwargs:
......
...@@ -275,7 +275,7 @@ class FNetTokenizer(PreTrainedTokenizer): ...@@ -275,7 +275,7 @@ class FNetTokenizer(PreTrainedTokenizer):
Create a mask from the two sequences passed to be used in a sequence-pair classification task. An FNet sequence Create a mask from the two sequences passed to be used in a sequence-pair classification task. An FNet sequence
pair mask has the following format: : pair mask has the following format: :
```python ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second sequence | 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 | first sequence | second sequence |
``` ```
......
...@@ -120,7 +120,7 @@ class FSMTConfig(PretrainedConfig): ...@@ -120,7 +120,7 @@ class FSMTConfig(PretrainedConfig):
```python ```python
>>> from transformers import FSMTConfig, FSMTModel >>> from transformers import FSMTConfig, FSMTModel
>>> config = FSMTConfig.from_pretrained('facebook/wmt19-en-ru') >>> config = FSMTConfig.from_pretrained("facebook/wmt19-en-ru")
>>> model = FSMTModel(config) >>> model = FSMTModel(config)
```""" ```"""
model_type = "fsmt" model_type = "fsmt"
......
...@@ -1114,10 +1114,10 @@ class FunnelForPreTraining(FunnelPreTrainedModel): ...@@ -1114,10 +1114,10 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
>>> from transformers import FunnelTokenizer, FunnelForPreTraining >>> from transformers import FunnelTokenizer, FunnelForPreTraining
>>> import torch >>> import torch
>>> tokenizer = FunnelTokenizer.from_pretrained('funnel-transformer/small') >>> tokenizer = FunnelTokenizer.from_pretrained("funnel-transformer/small")
>>> model = FunnelForPreTraining.from_pretrained('funnel-transformer/small') >>> model = FunnelForPreTraining.from_pretrained("funnel-transformer/small")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> logits = model(**inputs).logits >>> logits = model(**inputs).logits
```""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
...@@ -1273,10 +1273,10 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel): ...@@ -1273,10 +1273,10 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel):
>>> from transformers import FunnelTokenizer, TFFunnelForPreTraining >>> from transformers import FunnelTokenizer, TFFunnelForPreTraining
>>> import torch >>> import torch
>>> tokenizer = TFFunnelTokenizer.from_pretrained('funnel-transformer/small') >>> tokenizer = TFFunnelTokenizer.from_pretrained("funnel-transformer/small")
>>> model = TFFunnelForPreTraining.from_pretrained('funnel-transformer/small') >>> model = TFFunnelForPreTraining.from_pretrained("funnel-transformer/small")
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "tf") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> logits = model(inputs).logits >>> logits = model(inputs).logits
```""" ```"""
inputs = input_processing( inputs = input_processing(
......
...@@ -631,12 +631,13 @@ PARALLELIZE_DOCSTRING = r""" ...@@ -631,12 +631,13 @@ PARALLELIZE_DOCSTRING = r"""
```python ```python
# Here is an example of a device map on a machine with 4 GPUs using gpt2-xl, which has a total of 48 attention modules: # Here is an example of a device map on a machine with 4 GPUs using gpt2-xl, which has a total of 48 attention modules:
model = GPT2LMHeadModel.from_pretrained('gpt2-xl') model = GPT2LMHeadModel.from_pretrained("gpt2-xl")
device_map = {0: [0, 1, 2, 3, 4, 5, 6, 7, 8], device_map = {
0: [0, 1, 2, 3, 4, 5, 6, 7, 8],
1: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], 1: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
2: [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], 2: [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34],
3: [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]} 3: [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47],
}
model.parallelize(device_map) model.parallelize(device_map)
``` ```
""" """
...@@ -647,14 +648,15 @@ DEPARALLELIZE_DOCSTRING = r""" ...@@ -647,14 +648,15 @@ DEPARALLELIZE_DOCSTRING = r"""
```python ```python
# On a 4 GPU machine with gpt2-large: # On a 4 GPU machine with gpt2-large:
model = GPT2LMHeadModel.from_pretrained('gpt2-large') model = GPT2LMHeadModel.from_pretrained("gpt2-large")
device_map = {0: [0, 1, 2, 3, 4, 5, 6, 7], device_map = {
0: [0, 1, 2, 3, 4, 5, 6, 7],
1: [8, 9, 10, 11, 12, 13, 14, 15], 1: [8, 9, 10, 11, 12, 13, 14, 15],
2: [16, 17, 18, 19, 20, 21, 22, 23], 2: [16, 17, 18, 19, 20, 21, 22, 23],
3: [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]} 3: [24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
model.parallelize(device_map) # Splits the model across several devices }
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache() model.parallelize(device_map) # Splits the model across several devices
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache()
``` ```
""" """
...@@ -1224,13 +1226,15 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -1224,13 +1226,15 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
>>> import torch >>> import torch
>>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel >>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') >>> tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
>>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2') >>> model = GPT2DoubleHeadsModel.from_pretrained("gpt2")
>>> # Add a [CLS] to the vocabulary (we should train it also!) >>> # Add a [CLS] to the vocabulary (we should train it also!)
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'}) >>> num_added_tokens = tokenizer.add_special_tokens({"cls_token": "[CLS]"})
>>> embedding_layer = model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size >>> embedding_layer = model.resize_token_embeddings(
... len(tokenizer)
>>> ) # Update the model embeddings with the new vocabulary size
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
>>> encoded_choices = [tokenizer.encode(s) for s in choices] >>> encoded_choices = [tokenizer.encode(s) for s in choices]
......
...@@ -1033,13 +1033,15 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): ...@@ -1033,13 +1033,15 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
>>> import tensorflow as tf >>> import tensorflow as tf
>>> from transformers import GPT2Tokenizer, TFGPT2DoubleHeadsModel >>> from transformers import GPT2Tokenizer, TFGPT2DoubleHeadsModel
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') >>> tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
>>> model = TFGPT2DoubleHeadsModel.from_pretrained('gpt2') >>> model = TFGPT2DoubleHeadsModel.from_pretrained("gpt2")
>>> # Add a [CLS] to the vocabulary (we should train it also!) >>> # Add a [CLS] to the vocabulary (we should train it also!)
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'}) >>> num_added_tokens = tokenizer.add_special_tokens({"cls_token": "[CLS]"})
>>> embedding_layer = model.resize_token_embeddings(len(tokenizer)) # Update the model embeddings with the new vocabulary size >>> embedding_layer = model.resize_token_embeddings(
... len(tokenizer)
>>> ) # Update the model embeddings with the new vocabulary size
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
>>> encoded_choices = [tokenizer.encode(s) for s in choices] >>> encoded_choices = [tokenizer.encode(s) for s in choices]
......
...@@ -412,11 +412,13 @@ PARALLELIZE_DOCSTRING = r""" ...@@ -412,11 +412,13 @@ PARALLELIZE_DOCSTRING = r"""
```python ```python
# Here is an example of a device map on a machine with 4 GPUs using gpt-j-6B, which has a total of 28 attention modules: # Here is an example of a device map on a machine with 4 GPUs using gpt-j-6B, which has a total of 28 attention modules:
model = GPTJForCausalLM.from_pretrained('EleutherAI/gpt-j-6B') model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
device_map = {0: [0, 1, 2, 3, 4, 5, 6], device_map = {
1: [7, 8, 9, 10, 11, 12, 13], 0: [0, 1, 2, 3, 4, 5, 6],
2: [14, 15, 16, 17, 18, 19, 20], 1: [7, 8, 9, 10, 11, 12, 13],
3: [21, 22, 23, 24, 25, 26, 27]} 2: [14, 15, 16, 17, 18, 19, 20],
3: [21, 22, 23, 24, 25, 26, 27],
}
model.parallelize(device_map) model.parallelize(device_map)
``` ```
""" """
...@@ -428,13 +430,15 @@ DEPARALLELIZE_DOCSTRING = r""" ...@@ -428,13 +430,15 @@ DEPARALLELIZE_DOCSTRING = r"""
```python ```python
# On a 4 GPU machine with gpt-j-6B: # On a 4 GPU machine with gpt-j-6B:
model = GPTJForCausalLM.from_pretrained('EleutherAI/gpt-j-6B') model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
device_map = {0: [0, 1, 2, 3, 4, 5, 6], device_map = {
1: [7, 8, 9, 10, 11, 12, 13], 0: [0, 1, 2, 3, 4, 5, 6],
2: [14, 15, 16, 17, 18, 19, 20], 1: [7, 8, 9, 10, 11, 12, 13],
3: [21, 22, 23, 24, 25, 26, 27]} 2: [14, 15, 16, 17, 18, 19, 20],
model.parallelize(device_map) # Splits the model across several devices 3: [21, 22, 23, 24, 25, 26, 27],
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache() }
model.parallelize(device_map) # Splits the model across several devices
model.deparallelize() # Put the model back on cpu and cleans memory by calling torch.cuda.empty_cache()
``` ```
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment