Unverified Commit b5e2b183 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Doc styler examples (#14953)

* Fix bad examples

* Add black formatting to style_doc

* Use first nonempty line

* Put it at the right place

* Don't add spaces to empty lines

* Better templates

* Deal with triple quotes in docstrings

* Result of style_doc

* Enable mdx treatment and fix code examples in MDXs

* Result of doc styler on doc source files

* Last fixes

* Break copy from
parent e13f72fb
...@@ -987,11 +987,13 @@ class HubertModel(HubertPreTrainedModel): ...@@ -987,11 +987,13 @@ class HubertModel(HubertPreTrainedModel):
>>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft") >>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
>>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft") >>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")
>>> def map_to_array(batch): >>> def map_to_array(batch):
... speech, _ = sf.read(batch["file"]) ... speech, _ = sf.read(batch["file"])
... batch["speech"] = speech ... batch["speech"] = speech
... return batch ... return batch
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array) >>> ds = ds.map(map_to_array)
......
...@@ -1417,11 +1417,13 @@ class TFHubertModel(TFHubertPreTrainedModel): ...@@ -1417,11 +1417,13 @@ class TFHubertModel(TFHubertPreTrainedModel):
>>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-base-960h") >>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-base-960h")
>>> model = TFHubertModel.from_pretrained("facebook/hubert-base-960h") >>> model = TFHubertModel.from_pretrained("facebook/hubert-base-960h")
>>> def map_to_array(batch): >>> def map_to_array(batch):
... speech, _ = sf.read(batch["file"]) ... speech, _ = sf.read(batch["file"])
... batch["speech"] = speech ... batch["speech"] = speech
... return batch ... return batch
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array) >>> ds = ds.map(map_to_array)
...@@ -1528,16 +1530,19 @@ class TFHubertForCTC(TFHubertPreTrainedModel): ...@@ -1528,16 +1530,19 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
>>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-base-960h") >>> processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-base-960h")
>>> model = TFHubertForCTC.from_pretrained("facebook/hubert-base-960h") >>> model = TFHubertForCTC.from_pretrained("facebook/hubert-base-960h")
>>> def map_to_array(batch): >>> def map_to_array(batch):
... speech, _ = sf.read(batch["file"]) ... speech, _ = sf.read(batch["file"])
... batch["speech"] = speech ... batch["speech"] = speech
... return batch ... return batch
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array) >>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1 >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
>>> logits = model(input_values).logits >>> predicted_ids = tf.argmax(logits, axis=-1) >>> logits = model(input_values).logits
>>> predicted_ids = tf.argmax(logits, axis=-1)
>>> transcription = processor.decode(predicted_ids[0]) >>> transcription = processor.decode(predicted_ids[0])
......
...@@ -686,11 +686,11 @@ class ImageGPTModel(ImageGPTPreTrainedModel): ...@@ -686,11 +686,11 @@ class ImageGPTModel(ImageGPTPreTrainedModel):
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg' >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained('openai/imagegpt-small') >>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTModel.from_pretrained('openai/imagegpt-small') >>> model = ImageGPTModel.from_pretrained("openai/imagegpt-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -981,27 +981,31 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel): ...@@ -981,27 +981,31 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel):
>>> import matplotlib.pyplot as plt >>> import matplotlib.pyplot as plt
>>> import numpy as np >>> import numpy as np
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained('openai/imagegpt-small') >>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTForCausalImageModeling.from_pretrained('openai/imagegpt-small') >>> model = ImageGPTForCausalImageModeling.from_pretrained("openai/imagegpt-small")
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
>>> model.to(device) >>> model.to(device)
>>> # unconditional generation of 8 images >>> # unconditional generation of 8 images
>>> batch_size = 8 >>> batch_size = 8
>>> context = torch.full((batch_size, 1), model.config.vocab_size - 1) #initialize with SOS token >>> context = torch.full((batch_size, 1), model.config.vocab_size - 1) # initialize with SOS token
>>> context = torch.tensor(context).to(device) >>> context = torch.tensor(context).to(device)
>>> output = model.generate(input_ids=context, max_length=model.config.n_positions + 1, temperature=1.0, do_sample=True, top_k=40) >>> output = model.generate(
... input_ids=context, max_length=model.config.n_positions + 1, temperature=1.0, do_sample=True, top_k=40
... )
>>> clusters = feature_extractor.clusters >>> clusters = feature_extractor.clusters
>>> n_px = feature_extractor.size >>> n_px = feature_extractor.size
>>> samples = output[:,1:].cpu().detach().numpy() >>> samples = output[:, 1:].cpu().detach().numpy()
>>> samples_img = [np.reshape(np.rint(127.5 * (clusters[s] + 1.0)), [n_px, n_px, 3]).astype(np.uint8) for s in samples] # convert color cluster tokens back to pixels >>> samples_img = [
... np.reshape(np.rint(127.5 * (clusters[s] + 1.0)), [n_px, n_px, 3]).astype(np.uint8) for s in samples
>>> ] # convert color cluster tokens back to pixels
>>> f, axes = plt.subplots(1, batch_size, dpi=300) >>> f, axes = plt.subplots(1, batch_size, dpi=300)
>>> for img, ax in zip(samples_img, axes): >>> for img, ax in zip(samples_img, axes):
... ax.axis('off') ... ax.axis("off")
... ax.imshow(img) ... ax.imshow(img)
```""" ```"""
if "pixel_values" in kwargs: if "pixel_values" in kwargs:
...@@ -1126,11 +1130,11 @@ class ImageGPTForImageClassification(ImageGPTPreTrainedModel): ...@@ -1126,11 +1130,11 @@ class ImageGPTForImageClassification(ImageGPTPreTrainedModel):
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg' >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained('openai/imagegpt-small') >>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTForImageClassification.from_pretrained('openai/imagegpt-small') >>> model = ImageGPTForImageClassification.from_pretrained("openai/imagegpt-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
......
...@@ -755,8 +755,8 @@ class LayoutLMModel(LayoutLMPreTrainedModel): ...@@ -755,8 +755,8 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
>>> from transformers import LayoutLMTokenizer, LayoutLMModel >>> from transformers import LayoutLMTokenizer, LayoutLMModel
>>> import torch >>> import torch
>>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased') >>> tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMModel.from_pretrained('microsoft/layoutlm-base-uncased') >>> model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased")
>>> words = ["Hello", "world"] >>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
...@@ -768,13 +768,15 @@ class LayoutLMModel(LayoutLMPreTrainedModel): ...@@ -768,13 +768,15 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
>>> # add bounding boxes of cls + sep tokens >>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
>>> encoding = tokenizer(' '.join(words), return_tensors="pt") >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"] >>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"] >>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes]) >>> bbox = torch.tensor([token_boxes])
>>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids) >>> outputs = model(
... input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids
... )
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
```""" ```"""
...@@ -900,8 +902,8 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel): ...@@ -900,8 +902,8 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
>>> from transformers import LayoutLMTokenizer, LayoutLMForMaskedLM >>> from transformers import LayoutLMTokenizer, LayoutLMForMaskedLM
>>> import torch >>> import torch
>>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased') >>> tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForMaskedLM.from_pretrained('microsoft/layoutlm-base-uncased') >>> model = LayoutLMForMaskedLM.from_pretrained("microsoft/layoutlm-base-uncased")
>>> words = ["Hello", "[MASK]"] >>> words = ["Hello", "[MASK]"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
...@@ -913,7 +915,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel): ...@@ -913,7 +915,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
>>> # add bounding boxes of cls + sep tokens >>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
>>> encoding = tokenizer(' '.join(words), return_tensors="pt") >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"] >>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"] >>> token_type_ids = encoding["token_type_ids"]
...@@ -921,8 +923,13 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel): ...@@ -921,8 +923,13 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
>>> labels = tokenizer("Hello world", return_tensors="pt")["input_ids"] >>> labels = tokenizer("Hello world", return_tensors="pt")["input_ids"]
>>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, >>> outputs = model(
... labels=labels) ... input_ids=input_ids,
... bbox=bbox,
... attention_mask=attention_mask,
... token_type_ids=token_type_ids,
... labels=labels,
... )
>>> loss = outputs.loss >>> loss = outputs.loss
```""" ```"""
...@@ -1017,8 +1024,8 @@ class LayoutLMForSequenceClassification(LayoutLMPreTrainedModel): ...@@ -1017,8 +1024,8 @@ class LayoutLMForSequenceClassification(LayoutLMPreTrainedModel):
>>> from transformers import LayoutLMTokenizer, LayoutLMForSequenceClassification >>> from transformers import LayoutLMTokenizer, LayoutLMForSequenceClassification
>>> import torch >>> import torch
>>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased') >>> tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForSequenceClassification.from_pretrained('microsoft/layoutlm-base-uncased') >>> model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")
>>> words = ["Hello", "world"] >>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
...@@ -1030,15 +1037,20 @@ class LayoutLMForSequenceClassification(LayoutLMPreTrainedModel): ...@@ -1030,15 +1037,20 @@ class LayoutLMForSequenceClassification(LayoutLMPreTrainedModel):
>>> # add bounding boxes of cls + sep tokens >>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
>>> encoding = tokenizer(' '.join(words), return_tensors="pt") >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"] >>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"] >>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes]) >>> bbox = torch.tensor([token_boxes])
>>> sequence_label = torch.tensor([1]) >>> sequence_label = torch.tensor([1])
>>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, >>> outputs = model(
... labels=sequence_label) ... input_ids=input_ids,
... bbox=bbox,
... attention_mask=attention_mask,
... token_type_ids=token_type_ids,
... labels=sequence_label,
... )
>>> loss = outputs.loss >>> loss = outputs.loss
>>> logits = outputs.logits >>> logits = outputs.logits
...@@ -1147,8 +1159,8 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel): ...@@ -1147,8 +1159,8 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel):
>>> from transformers import LayoutLMTokenizer, LayoutLMForTokenClassification >>> from transformers import LayoutLMTokenizer, LayoutLMForTokenClassification
>>> import torch >>> import torch
>>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased') >>> tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = LayoutLMForTokenClassification.from_pretrained('microsoft/layoutlm-base-uncased') >>> model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")
>>> words = ["Hello", "world"] >>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
...@@ -1160,15 +1172,20 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel): ...@@ -1160,15 +1172,20 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel):
>>> # add bounding boxes of cls + sep tokens >>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
>>> encoding = tokenizer(' '.join(words), return_tensors="pt") >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
>>> input_ids = encoding["input_ids"] >>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"] >>> token_type_ids = encoding["token_type_ids"]
>>> bbox = torch.tensor([token_boxes]) >>> bbox = torch.tensor([token_boxes])
>>> token_labels = torch.tensor([1,1,0,0]).unsqueeze(0) # batch size of 1 >>> token_labels = torch.tensor([1, 1, 0, 0]).unsqueeze(0) # batch size of 1
>>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, >>> outputs = model(
... labels=token_labels) ... input_ids=input_ids,
... bbox=bbox,
... attention_mask=attention_mask,
... token_type_ids=token_type_ids,
... labels=token_labels,
... )
>>> loss = outputs.loss >>> loss = outputs.loss
>>> logits = outputs.logits >>> logits = outputs.logits
......
...@@ -954,8 +954,8 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel): ...@@ -954,8 +954,8 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel):
>>> from transformers import LayoutLMTokenizer, TFLayoutLMModel >>> from transformers import LayoutLMTokenizer, TFLayoutLMModel
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased') >>> tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = TFLayoutLMModel.from_pretrained('microsoft/layoutlm-base-uncased') >>> model = TFLayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased")
>>> words = ["Hello", "world"] >>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
...@@ -967,13 +967,15 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel): ...@@ -967,13 +967,15 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel):
>>> # add bounding boxes of cls + sep tokens >>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
>>> encoding = tokenizer(' '.join(words), return_tensors="tf") >>> encoding = tokenizer(" ".join(words), return_tensors="tf")
>>> input_ids = encoding["input_ids"] >>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"] >>> token_type_ids = encoding["token_type_ids"]
>>> bbox = tf.convert_to_tensor([token_boxes]) >>> bbox = tf.convert_to_tensor([token_boxes])
>>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids) >>> outputs = model(
... input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids
... )
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
```""" ```"""
...@@ -1094,8 +1096,8 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL ...@@ -1094,8 +1096,8 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL
>>> from transformers import LayoutLMTokenizer, TFLayoutLMForMaskedLM >>> from transformers import LayoutLMTokenizer, TFLayoutLMForMaskedLM
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased') >>> tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = TFLayoutLMForMaskedLM.from_pretrained('microsoft/layoutlm-base-uncased') >>> model = TFLayoutLMForMaskedLM.from_pretrained("microsoft/layoutlm-base-uncased")
>>> words = ["Hello", "[MASK]"] >>> words = ["Hello", "[MASK]"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
...@@ -1107,7 +1109,7 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL ...@@ -1107,7 +1109,7 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL
>>> # add bounding boxes of cls + sep tokens >>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
>>> encoding = tokenizer(' '.join(words), return_tensors="tf") >>> encoding = tokenizer(" ".join(words), return_tensors="tf")
>>> input_ids = encoding["input_ids"] >>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"] >>> token_type_ids = encoding["token_type_ids"]
...@@ -1115,8 +1117,13 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL ...@@ -1115,8 +1117,13 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL
>>> labels = tokenizer("Hello world", return_tensors="tf")["input_ids"] >>> labels = tokenizer("Hello world", return_tensors="tf")["input_ids"]
>>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, >>> outputs = model(
... labels=labels) ... input_ids=input_ids,
... bbox=bbox,
... attention_mask=attention_mask,
... token_type_ids=token_type_ids,
... labels=labels,
... )
>>> loss = outputs.loss >>> loss = outputs.loss
```""" ```"""
...@@ -1231,8 +1238,8 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC ...@@ -1231,8 +1238,8 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC
>>> from transformers import LayoutLMTokenizer, TFLayoutLMForSequenceClassification >>> from transformers import LayoutLMTokenizer, TFLayoutLMForSequenceClassification
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased') >>> tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = TFLayoutLMForSequenceClassification.from_pretrained('microsoft/layoutlm-base-uncased') >>> model = TFLayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")
>>> words = ["Hello", "world"] >>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
...@@ -1244,15 +1251,20 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC ...@@ -1244,15 +1251,20 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC
>>> # add bounding boxes of cls + sep tokens >>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
>>> encoding = tokenizer(' '.join(words), return_tensors="tf") >>> encoding = tokenizer(" ".join(words), return_tensors="tf")
>>> input_ids = encoding["input_ids"] >>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"] >>> token_type_ids = encoding["token_type_ids"]
>>> bbox = tf.convert_to_tensor([token_boxes]) >>> bbox = tf.convert_to_tensor([token_boxes])
>>> sequence_label = tf.convert_to_tensor([1]) >>> sequence_label = tf.convert_to_tensor([1])
>>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, >>> outputs = model(
... labels=sequence_label) ... input_ids=input_ids,
... bbox=bbox,
... attention_mask=attention_mask,
... token_type_ids=token_type_ids,
... labels=sequence_label,
... )
>>> loss = outputs.loss >>> loss = outputs.loss
>>> logits = outputs.logits >>> logits = outputs.logits
...@@ -1371,8 +1383,8 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif ...@@ -1371,8 +1383,8 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif
>>> from transformers import LayoutLMTokenizer, TFLayoutLMForTokenClassification >>> from transformers import LayoutLMTokenizer, TFLayoutLMForTokenClassification
>>> import torch >>> import torch
>>> tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased') >>> tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
>>> model = TFLayoutLMForTokenClassification.from_pretrained('microsoft/layoutlm-base-uncased') >>> model = TFLayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")
>>> words = ["Hello", "world"] >>> words = ["Hello", "world"]
>>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782] >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
...@@ -1384,15 +1396,20 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif ...@@ -1384,15 +1396,20 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif
>>> # add bounding boxes of cls + sep tokens >>> # add bounding boxes of cls + sep tokens
>>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]] >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
>>> encoding = tokenizer(' '.join(words), return_tensors="tf") >>> encoding = tokenizer(" ".join(words), return_tensors="tf")
>>> input_ids = encoding["input_ids"] >>> input_ids = encoding["input_ids"]
>>> attention_mask = encoding["attention_mask"] >>> attention_mask = encoding["attention_mask"]
>>> token_type_ids = encoding["token_type_ids"] >>> token_type_ids = encoding["token_type_ids"]
>>> bbox = tf.convert_to_tensor([token_boxes]) >>> bbox = tf.convert_to_tensor([token_boxes])
>>> token_labels = tf.convert_to_tensor([1,1,0,0]) >>> token_labels = tf.convert_to_tensor([1, 1, 0, 0])
>>> outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids, >>> outputs = model(
... labels=token_labels) ... input_ids=input_ids,
... bbox=bbox,
... attention_mask=attention_mask,
... token_type_ids=token_type_ids,
... labels=token_labels,
... )
>>> loss = outputs.loss >>> loss = outputs.loss
>>> logits = outputs.logits >>> logits = outputs.logits
......
...@@ -827,8 +827,8 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel): ...@@ -827,8 +827,8 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
>>> from transformers import LayoutLMv2Processor, LayoutLMv2Model >>> from transformers import LayoutLMv2Processor, LayoutLMv2Model
>>> from PIL import Image >>> from PIL import Image
>>> processor = LayoutLMv2Processor.from_pretrained('microsoft/layoutlmv2-base-uncased') >>> processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> model = LayoutLMv2Model.from_pretrained('microsoft/layoutlmv2-base-uncased') >>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") >>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
...@@ -995,8 +995,8 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel): ...@@ -995,8 +995,8 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
>>> from PIL import Image >>> from PIL import Image
>>> import torch >>> import torch
>>> processor = LayoutLMv2Processor.from_pretrained('microsoft/layoutlmv2-base-uncased') >>> processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> model = LayoutLMv2ForSequenceClassification.from_pretrained('microsoft/layoutlmv2-base-uncased') >>> model = LayoutLMv2ForSequenceClassification.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") >>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
...@@ -1161,12 +1161,12 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel): ...@@ -1161,12 +1161,12 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
>>> from transformers import LayoutLMv2Processor, LayoutLMv2ForTokenClassification >>> from transformers import LayoutLMv2Processor, LayoutLMv2ForTokenClassification
>>> from PIL import Image >>> from PIL import Image
>>> processor = LayoutLMv2Processor.from_pretrained('microsoft/layoutlmv2-base-uncased', revision="no_ocr") >>> processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
>>> model = LayoutLMv2ForTokenClassification.from_pretrained('microsoft/layoutlmv2-base-uncased') >>> model = LayoutLMv2ForTokenClassification.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") >>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
>>> words = ["hello", "world"] >>> words = ["hello", "world"]
>>> boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes >>> boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
>>> word_labels = [0, 1] >>> word_labels = [0, 1]
>>> encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt") >>> encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
...@@ -1285,8 +1285,8 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel): ...@@ -1285,8 +1285,8 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
>>> from PIL import Image >>> from PIL import Image
>>> import torch >>> import torch
>>> processor = LayoutLMv2Processor.from_pretrained('microsoft/layoutlmv2-base-uncased') >>> processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained('microsoft/layoutlmv2-base-uncased') >>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") >>> image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
>>> question = "what's his name?" >>> question = "what's his name?"
......
...@@ -343,10 +343,24 @@ class LEDEncoderSelfAttention(nn.Module): ...@@ -343,10 +343,24 @@ class LEDEncoderSelfAttention(nn.Module):
Example: Example:
```python ```python
chunked_hidden_states: [ 0.4983, 2.6918, -0.0071, 1.0492, chunked_hidden_states: [
-1.8348, 0.7672, 0.2986, 0.0285, 0.4983,
-0.7584, 0.4206, -0.0405, 0.1599, 2.6918,
2.0514, -1.1600, 0.5372, 0.2629 ] -0.0071,
1.0492,
-1.8348,
0.7672,
0.2986,
0.0285,
-0.7584,
0.4206,
-0.0405,
0.1599,
2.0514,
-1.1600,
0.5372,
0.2629,
]
window_overlap = num_rows = 4 window_overlap = num_rows = 4
``` ```
...@@ -2334,11 +2348,12 @@ class LEDForConditionalGeneration(LEDPreTrainedModel): ...@@ -2334,11 +2348,12 @@ class LEDForConditionalGeneration(LEDPreTrainedModel):
```python ```python
>>> from transformers import LEDTokenizer, LEDForConditionalGeneration >>> from transformers import LEDTokenizer, LEDForConditionalGeneration
>>> tokenizer = LEDTokenizer.from_pretrained('allenai/led-base-16384')
>>> tokenizer = LEDTokenizer.from_pretrained("allenai/led-base-16384")
>>> TXT = "My friends are <mask> but they eat too many carbs." >>> TXT = "My friends are <mask> but they eat too many carbs."
>>> model = LEDForConditionalGeneration.from_pretrained('allenai/led-base-16384') >>> model = LEDForConditionalGeneration.from_pretrained("allenai/led-base-16384")
>>> input_ids = tokenizer([TXT], return_tensors='pt')['input_ids'] >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
>>> prediction = model.generate(input_ids)[0] >>> prediction = model.generate(input_ids)[0]
>>> print(tokenizer.decode(prediction, skip_special_tokens=True)) >>> print(tokenizer.decode(prediction, skip_special_tokens=True))
......
...@@ -610,10 +610,24 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): ...@@ -610,10 +610,24 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer):
Example: Example:
```python ```python
chunked_hidden_states: [ 0.4983, 2.6918, -0.0071, 1.0492, chunked_hidden_states: [
-1.8348, 0.7672, 0.2986, 0.0285, 0.4983,
-0.7584, 0.4206, -0.0405, 0.1599, 2.6918,
2.0514, -1.1600, 0.5372, 0.2629 ] -0.0071,
1.0492,
-1.8348,
0.7672,
0.2986,
0.0285,
-0.7584,
0.4206,
-0.0405,
0.1599,
2.0514,
-1.1600,
0.5372,
0.2629,
]
window_overlap = num_rows = 4 window_overlap = num_rows = 4
``` ```
...@@ -2382,11 +2396,12 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel): ...@@ -2382,11 +2396,12 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel):
```python ```python
>>> from transformers import LEDTokenizer, TFLEDForConditionalGeneration >>> from transformers import LEDTokenizer, TFLEDForConditionalGeneration
>>> import tensorflow as tf >>> import tensorflow as tf
>>> mname = 'allenai/led-base-16384'
>>> mname = "allenai/led-base-16384"
>>> tokenizer = LEDTokenizer.from_pretrained(mname) >>> tokenizer = LEDTokenizer.from_pretrained(mname)
>>> TXT = "My friends are <mask> but they eat too many carbs." >>> TXT = "My friends are <mask> but they eat too many carbs."
>>> model = TFLEDForConditionalGeneration.from_pretrained(mname) >>> model = TFLEDForConditionalGeneration.from_pretrained(mname)
>>> batch = tokenizer([TXT], return_tensors='tf') >>> batch = tokenizer([TXT], return_tensors="tf")
>>> logits = model(inputs=batch.input_ids).logits >>> logits = model(inputs=batch.input_ids).logits
>>> probs = tf.nn.softmax(logits[0]) >>> probs = tf.nn.softmax(logits[0])
>>> # probs[5] is associated with the mask token >>> # probs[5] is associated with the mask token
......
...@@ -726,10 +726,24 @@ class LongformerSelfAttention(nn.Module): ...@@ -726,10 +726,24 @@ class LongformerSelfAttention(nn.Module):
Example: Example:
```python ```python
chunked_hidden_states: [ 0.4983, 2.6918, -0.0071, 1.0492, chunked_hidden_states: [
-1.8348, 0.7672, 0.2986, 0.0285, 0.4983,
-0.7584, 0.4206, -0.0405, 0.1599, 2.6918,
2.0514, -1.1600, 0.5372, 0.2629 ] -0.0071,
1.0492,
-1.8348,
0.7672,
0.2986,
0.0285,
-0.7584,
0.4206,
-0.0405,
0.1599,
2.0514,
-1.1600,
0.5372,
0.2629,
]
window_overlap = num_rows = 4 window_overlap = num_rows = 4
``` ```
...@@ -1605,19 +1619,30 @@ class LongformerModel(LongformerPreTrainedModel): ...@@ -1605,19 +1619,30 @@ class LongformerModel(LongformerPreTrainedModel):
>>> import torch >>> import torch
>>> from transformers import LongformerModel, LongformerTokenizer >>> from transformers import LongformerModel, LongformerTokenizer
>>> model = LongformerModel.from_pretrained('allenai/longformer-base-4096') >>> model = LongformerModel.from_pretrained("allenai/longformer-base-4096")
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096') >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document >>> SAMPLE_TEXT = " ".join(["Hello world! "] * 1000) # long input document
>>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1 >>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1
>>> attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to local attention >>> attention_mask = torch.ones(
>>> global_attention_mask = torch.zeros(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to global attention to be deactivated for all tokens ... input_ids.shape, dtype=torch.long, device=input_ids.device
>>> global_attention_mask[:, [1, 4, 21,]] = 1 # Set global attention to random tokens for the sake of this example >>> ) # initialize to local attention
... # Usually, set global attention based on the task. For example, >>> global_attention_mask = torch.zeros(
... # classification: the <s> token ... input_ids.shape, dtype=torch.long, device=input_ids.device
... # QA: question tokens >>> ) # initialize to global attention to be deactivated for all tokens
... # LM: potentially on the beginning of sentences and paragraphs >>> global_attention_mask[
... :,
... [
... 1,
... 4,
... 21,
... ],
>>> ] = 1 # Set global attention to random tokens for the sake of this example
>>> # Usually, set global attention based on the task. For example,
>>> # classification: the <s> token
>>> # QA: question tokens
>>> # LM: potentially on the beginning of sentences and paragraphs
>>> outputs = model(input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask) >>> outputs = model(input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask)
>>> sequence_output = outputs.last_hidden_state >>> sequence_output = outputs.last_hidden_state
>>> pooled_output = outputs.pooler_output >>> pooled_output = outputs.pooler_output
...@@ -1748,14 +1773,14 @@ class LongformerForMaskedLM(LongformerPreTrainedModel): ...@@ -1748,14 +1773,14 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
>>> import torch >>> import torch
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer >>> from transformers import LongformerForMaskedLM, LongformerTokenizer
>>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096') >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096")
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096') >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document >>> SAMPLE_TEXT = " ".join(["Hello world! "] * 1000) # long input document
>>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1 >>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1
>>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM >>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM
... # check `LongformerModel.forward` for more details how to set *attention_mask* >>> # check `LongformerModel.forward` for more details how to set *attention_mask*
>>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids) >>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
>>> loss = outputs.loss >>> loss = outputs.loss
>>> prediction_logits = outputs.logits >>> prediction_logits = outputs.logits
...@@ -1994,8 +2019,10 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel): ...@@ -1994,8 +2019,10 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel):
>>> end_logits = outputs.end_logits >>> end_logits = outputs.end_logits
>>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist()) >>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist())
>>> answer_tokens = all_tokens[torch.argmax(start_logits) :torch.argmax(end_logits)+1] >>> answer_tokens = all_tokens[torch.argmax(start_logits) : torch.argmax(end_logits) + 1]
>>> answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token >>> answer = tokenizer.decode(
... tokenizer.convert_tokens_to_ids(answer_tokens)
>>> ) # remove space prepending space token
```""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
...@@ -1138,10 +1138,24 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): ...@@ -1138,10 +1138,24 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer):
Example: Example:
```python ```python
chunked_hidden_states: [ 0.4983, 2.6918, -0.0071, 1.0492, chunked_hidden_states: [
-1.8348, 0.7672, 0.2986, 0.0285, 0.4983,
-0.7584, 0.4206, -0.0405, 0.1599, 2.6918,
2.0514, -1.1600, 0.5372, 0.2629 ] -0.0071,
1.0492,
-1.8348,
0.7672,
0.2986,
0.0285,
-0.7584,
0.4206,
-0.0405,
0.1599,
2.0514,
-1.1600,
0.5372,
0.2629,
]
window_overlap = num_rows = 4 window_overlap = num_rows = 4
``` ```
......
...@@ -937,8 +937,8 @@ class LukeModel(LukePreTrainedModel): ...@@ -937,8 +937,8 @@ class LukeModel(LukePreTrainedModel):
>>> tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base") >>> tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
>>> model = LukeModel.from_pretrained("studio-ousia/luke-base") >>> model = LukeModel.from_pretrained("studio-ousia/luke-base")
# Compute the contextualized entity representation corresponding to the entity mention "Beyoncé" # Compute the contextualized entity representation corresponding to the entity mention "Beyoncé"
>>> text = "Beyoncé lives in Los Angeles." >>> text = "Beyoncé lives in Los Angeles."
>>> entity_spans = [(0, 7)] # character-based entity span corresponding to "Beyoncé" >>> entity_spans = [(0, 7)] # character-based entity span corresponding to "Beyoncé"
...@@ -946,13 +946,21 @@ class LukeModel(LukePreTrainedModel): ...@@ -946,13 +946,21 @@ class LukeModel(LukePreTrainedModel):
>>> outputs = model(**encoding) >>> outputs = model(**encoding)
>>> word_last_hidden_state = outputs.last_hidden_state >>> word_last_hidden_state = outputs.last_hidden_state
>>> entity_last_hidden_state = outputs.entity_last_hidden_state >>> entity_last_hidden_state = outputs.entity_last_hidden_state
# Input Wikipedia entities to obtain enriched contextualized representations of word tokens # Input Wikipedia entities to obtain enriched contextualized representations of word tokens
>>> text = "Beyoncé lives in Los Angeles."
>>> entities = ["Beyoncé", "Los Angeles"] # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
>>> entity_spans = [(0, 7), (17, 28)] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
>>> encoding = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt") >>> text = "Beyoncé lives in Los Angeles."
>>> entities = [
... "Beyoncé",
... "Los Angeles",
>>> ] # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
>>> entity_spans = [
... (0, 7),
... (17, 28),
>>> ] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
>>> encoding = tokenizer(
... text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt"
... )
>>> outputs = model(**encoding) >>> outputs = model(**encoding)
>>> word_last_hidden_state = outputs.last_hidden_state >>> word_last_hidden_state = outputs.last_hidden_state
>>> entity_last_hidden_state = outputs.entity_last_hidden_state >>> entity_last_hidden_state = outputs.entity_last_hidden_state
...@@ -1423,7 +1431,10 @@ class LukeForEntityPairClassification(LukePreTrainedModel): ...@@ -1423,7 +1431,10 @@ class LukeForEntityPairClassification(LukePreTrainedModel):
>>> model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred") >>> model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
>>> text = "Beyoncé lives in Los Angeles." >>> text = "Beyoncé lives in Los Angeles."
>>> entity_spans = [(0, 7), (17, 28)] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles" >>> entity_spans = [
... (0, 7),
... (17, 28),
>>> ] # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
>>> inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt") >>> inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
...@@ -1548,8 +1559,8 @@ class LukeForEntitySpanClassification(LukePreTrainedModel): ...@@ -1548,8 +1559,8 @@ class LukeForEntitySpanClassification(LukePreTrainedModel):
>>> model = LukeForEntitySpanClassification.from_pretrained("studio-ousia/luke-large-finetuned-conll-2003") >>> model = LukeForEntitySpanClassification.from_pretrained("studio-ousia/luke-large-finetuned-conll-2003")
>>> text = "Beyoncé lives in Los Angeles" >>> text = "Beyoncé lives in Los Angeles"
# List all possible entity spans in the text # List all possible entity spans in the text
>>> word_start_positions = [0, 8, 14, 17, 21] # character-based start positions of word tokens >>> word_start_positions = [0, 8, 14, 17, 21] # character-based start positions of word tokens
>>> word_end_positions = [7, 13, 16, 20, 28] # character-based end positions of word tokens >>> word_end_positions = [7, 13, 16, 20, 28] # character-based end positions of word tokens
>>> entity_spans = [] >>> entity_spans = []
...@@ -1563,7 +1574,7 @@ class LukeForEntitySpanClassification(LukePreTrainedModel): ...@@ -1563,7 +1574,7 @@ class LukeForEntitySpanClassification(LukePreTrainedModel):
>>> predicted_class_indices = logits.argmax(-1).squeeze().tolist() >>> predicted_class_indices = logits.argmax(-1).squeeze().tolist()
>>> for span, predicted_class_idx in zip(entity_spans, predicted_class_indices): >>> for span, predicted_class_idx in zip(entity_spans, predicted_class_indices):
... if predicted_class_idx != 0: ... if predicted_class_idx != 0:
... print(text[span[0]:span[1]], model.config.id2label[predicted_class_idx]) ... print(text[span[0] : span[1]], model.config.id2label[predicted_class_idx])
Beyoncé PER Beyoncé PER
Los Angeles LOC Los Angeles LOC
```""" ```"""
......
...@@ -1283,14 +1283,14 @@ class M2M100ForConditionalGeneration(M2M100PreTrainedModel): ...@@ -1283,14 +1283,14 @@ class M2M100ForConditionalGeneration(M2M100PreTrainedModel):
```python ```python
>>> from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration >>> from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
>>> model = M2M100ForConditionalGeneration.from_pretrained('facebook/m2m100_418M') >>> model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
>>> tokenizer = M2M100Tokenizer.from_pretrained('facebook/m2m100_418M') >>> tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
>>> text_to_translate = "Life is like a box of chocolates" >>> text_to_translate = "Life is like a box of chocolates"
>>> model_inputs = tokenizer(text_to_translate, return_tensors='pt') >>> model_inputs = tokenizer(text_to_translate, return_tensors="pt")
>>> # translate to French >>> # translate to French
>>> gen_tokens = model.generate( **model_inputs, forced_bos_token_id=tokenizer.get_lang_id("fr")) >>> gen_tokens = model.generate(**model_inputs, forced_bos_token_id=tokenizer.get_lang_id("fr"))
>>> print(tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)) >>> print(tokenizer.batch_decode(gen_tokens, skip_special_tokens=True))
```""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
...@@ -111,13 +111,14 @@ class M2M100Tokenizer(PreTrainedTokenizer): ...@@ -111,13 +111,14 @@ class M2M100Tokenizer(PreTrainedTokenizer):
```python ```python
>>> from transformers import M2M100Tokenizer >>> from transformers import M2M100Tokenizer
>>> tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M, src_lang="en", tgt_lang="ro")
>>> tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="ro")
>>> src_text = " UN Chief Says There Is No Military Solution in Syria" >>> src_text = " UN Chief Says There Is No Military Solution in Syria"
>>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria" >>> tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
>>> model_inputs = tokenizer(src_text, return_tensors="pt") >>> model_inputs = tokenizer(src_text, return_tensors="pt")
>>> with tokenizer.as_target_tokenizer(): >>> with tokenizer.as_target_tokenizer():
... labels = tokenizer(tgt_text, return_tensors="pt").input_ids ... labels = tokenizer(tgt_text, return_tensors="pt").input_ids
>>> # model(**model_inputs, labels=labels) should work >>> model(**model_inputs, labels=labels) # should work
```""" ```"""
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
......
...@@ -986,11 +986,11 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel): ...@@ -986,11 +986,11 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel):
```python ```python
>>> from transformers import MarianTokenizer, FlaxMarianMTModel >>> from transformers import MarianTokenizer, FlaxMarianMTModel
>>> tokenizer = MarianTokenizer.from_pretrained('facebook/marian-large-cnn') >>> tokenizer = MarianTokenizer.from_pretrained("facebook/marian-large-cnn")
>>> model = FlaxMarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-de') >>> model = FlaxMarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=64, return_tensors='jax') >>> inputs = tokenizer(text, max_length=64, return_tensors="jax")
>>> encoder_outputs = model.encode(**inputs) >>> encoder_outputs = model.encode(**inputs)
```""" ```"""
...@@ -1053,11 +1053,11 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel): ...@@ -1053,11 +1053,11 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel):
```python ```python
>>> from transformers import MarianTokenizer, FlaxMarianMTModel >>> from transformers import MarianTokenizer, FlaxMarianMTModel
>>> tokenizer = MarianTokenizer.from_pretrained('facebook/marian-large-cnn') >>> tokenizer = MarianTokenizer.from_pretrained("facebook/marian-large-cnn")
>>> model = FlaxMarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-de') >>> model = FlaxMarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=64, return_tensors='jax') >>> inputs = tokenizer(text, max_length=64, return_tensors="jax")
>>> encoder_outputs = model.encode(**inputs) >>> encoder_outputs = model.encode(**inputs)
>>> decoder_start_token_id = model.config.decoder_start_token_id >>> decoder_start_token_id = model.config.decoder_start_token_id
...@@ -1321,11 +1321,11 @@ class FlaxMarianMTModel(FlaxMarianPreTrainedModel): ...@@ -1321,11 +1321,11 @@ class FlaxMarianMTModel(FlaxMarianPreTrainedModel):
```python ```python
>>> from transformers import MarianTokenizer, FlaxMarianMTModel >>> from transformers import MarianTokenizer, FlaxMarianMTModel
>>> model = FlaxMarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-de') >>> model = FlaxMarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de') >>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=64, return_tensors='jax') >>> inputs = tokenizer(text, max_length=64, return_tensors="jax")
>>> encoder_outputs = model.encode(**inputs) >>> encoder_outputs = model.encode(**inputs)
>>> decoder_start_token_id = model.config.decoder_start_token_id >>> decoder_start_token_id = model.config.decoder_start_token_id
...@@ -1482,11 +1482,11 @@ FLAX_MARIAN_MT_DOCSTRING = """ ...@@ -1482,11 +1482,11 @@ FLAX_MARIAN_MT_DOCSTRING = """
```python ```python
>>> from transformers import MarianTokenizer, FlaxMarianMTModel >>> from transformers import MarianTokenizer, FlaxMarianMTModel
>>> model = FlaxMarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-de') >>> model = FlaxMarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de') >>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> input_ids = tokenizer(text, max_length=64, return_tensors='jax').input_ids >>> input_ids = tokenizer(text, max_length=64, return_tensors="jax").input_ids
>>> sequences = model.generate(input_ids, max_length=64, num_beams=2).sequences >>> sequences = model.generate(input_ids, max_length=64, num_beams=2).sequences
......
...@@ -531,10 +531,11 @@ MARIAN_GENERATION_EXAMPLE = r""" ...@@ -531,10 +531,11 @@ MARIAN_GENERATION_EXAMPLE = r"""
```python ```python
>>> from transformers import MarianTokenizer, MarianMTModel >>> from transformers import MarianTokenizer, MarianMTModel
>>> from typing import List >>> from typing import List
>>> src = 'fr' # source language
>>> trg = 'en' # target language >>> src = "fr" # source language
>>> trg = "en" # target language
>>> sample_text = "où est l'arrêt de bus ?" >>> sample_text = "où est l'arrêt de bus ?"
>>> model_name = f'Helsinki-NLP/opus-mt-{src}-{trg}' >>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
>>> model = MarianMTModel.from_pretrained(model_name) >>> model = MarianMTModel.from_pretrained(model_name)
>>> tokenizer = MarianTokenizer.from_pretrained(model_name) >>> tokenizer = MarianTokenizer.from_pretrained(model_name)
...@@ -1132,12 +1133,17 @@ class MarianModel(MarianPreTrainedModel): ...@@ -1132,12 +1133,17 @@ class MarianModel(MarianPreTrainedModel):
```python ```python
>>> from transformers import MarianTokenizer, MarianModel >>> from transformers import MarianTokenizer, MarianModel
>>> tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de') >>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> model = MarianModel.from_pretrained('Helsinki-NLP/opus-mt-en-de') >>> model = MarianModel.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1 >>> input_ids = tokenizer(
>>> decoder_input_ids = tokenizer("<pad> Studien haben gezeigt dass es hilfreich ist einen Hund zu besitzen", ... "Studies have been shown that owning a dog is good for you", return_tensors="pt"
... return_tensors="pt", add_special_tokens=False).input_ids # Batch size 1 >>> ).input_ids # Batch size 1
>>> decoder_input_ids = tokenizer(
... "<pad> Studien haben gezeigt dass es hilfreich ist einen Hund zu besitzen",
... return_tensors="pt",
... add_special_tokens=False,
>>> ).input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
...@@ -1513,8 +1519,8 @@ class MarianForCausalLM(MarianPreTrainedModel): ...@@ -1513,8 +1519,8 @@ class MarianForCausalLM(MarianPreTrainedModel):
```python ```python
>>> from transformers import MarianTokenizer, MarianForCausalLM >>> from transformers import MarianTokenizer, MarianForCausalLM
>>> tokenizer = MarianTokenizer.from_pretrained('facebook/bart-large') >>> tokenizer = MarianTokenizer.from_pretrained("facebook/bart-large")
>>> model = MarianForCausalLM.from_pretrained('facebook/bart-large', add_cross_attention=False) >>> model = MarianForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
......
...@@ -562,10 +562,11 @@ MARIAN_GENERATION_EXAMPLE = r""" ...@@ -562,10 +562,11 @@ MARIAN_GENERATION_EXAMPLE = r"""
```python ```python
>>> from transformers import MarianTokenizer, TFMarianMTModel >>> from transformers import MarianTokenizer, TFMarianMTModel
>>> from typing import List >>> from typing import List
>>> src = 'fr' # source language
>>> trg = 'en' # target language >>> src = "fr" # source language
>>> trg = "en" # target language
>>> sample_text = "où est l'arrêt de bus ?" >>> sample_text = "où est l'arrêt de bus ?"
>>> model_name = f'Helsinki-NLP/opus-mt-{src}-{trg}' >>> model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
>>> model = TFMarianMTModel.from_pretrained(model_name) >>> model = TFMarianMTModel.from_pretrained(model_name)
>>> tokenizer = MarianTokenizer.from_pretrained(model_name) >>> tokenizer = MarianTokenizer.from_pretrained(model_name)
......
...@@ -102,15 +102,17 @@ class MarianTokenizer(PreTrainedTokenizer): ...@@ -102,15 +102,17 @@ class MarianTokenizer(PreTrainedTokenizer):
```python ```python
>>> from transformers import MarianTokenizer >>> from transformers import MarianTokenizer
>>> tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de')
>>> src_texts = [ "I am a small frog.", "Tom asked his teacher for advice."] >>> tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
>>> src_texts = ["I am a small frog.", "Tom asked his teacher for advice."]
>>> tgt_texts = ["Ich bin ein kleiner Frosch.", "Tom bat seinen Lehrer um Rat."] # optional >>> tgt_texts = ["Ich bin ein kleiner Frosch.", "Tom bat seinen Lehrer um Rat."] # optional
>>> inputs = tokenizer(src_texts, return_tensors="pt", padding=True) >>> inputs = tokenizer(src_texts, return_tensors="pt", padding=True)
>>> with tokenizer.as_target_tokenizer(): >>> with tokenizer.as_target_tokenizer():
... labels = tokenizer(tgt_texts, return_tensors="pt", padding=True) ... labels = tokenizer(tgt_texts, return_tensors="pt", padding=True)
>>> inputs["labels"] = labels["input_ids"] >>> inputs["labels"] = labels["input_ids"]
# keys [input_ids, attention_mask, labels]. # keys [input_ids, attention_mask, labels].
>>> outputs = model(**inputs) should work
>>> outputs = model(**inputs) # should work
```""" ```"""
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
......
...@@ -1046,11 +1046,11 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel): ...@@ -1046,11 +1046,11 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel):
```python ```python
>>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration >>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration
>>> model = FlaxMBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> model = FlaxMBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25")
>>> tokenizer = MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=1024, return_tensors='jax') >>> inputs = tokenizer(text, max_length=1024, return_tensors="jax")
>>> encoder_outputs = model.encode(**inputs) >>> encoder_outputs = model.encode(**inputs)
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
...@@ -1112,11 +1112,11 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel): ...@@ -1112,11 +1112,11 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel):
```python ```python
>>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration >>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration
>>> model = FlaxMBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> model = FlaxMBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25")
>>> tokenizer = MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=1024, return_tensors='jax') >>> inputs = tokenizer(text, max_length=1024, return_tensors="jax")
>>> encoder_outputs = model.encode(**inputs) >>> encoder_outputs = model.encode(**inputs)
>>> decoder_start_token_id = model.config.decoder_start_token_id >>> decoder_start_token_id = model.config.decoder_start_token_id
...@@ -1379,11 +1379,11 @@ class FlaxMBartForConditionalGeneration(FlaxMBartPreTrainedModel): ...@@ -1379,11 +1379,11 @@ class FlaxMBartForConditionalGeneration(FlaxMBartPreTrainedModel):
```python ```python
>>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration >>> from transformers import MBartTokenizer, FlaxMBartForConditionalGeneration
>>> model = FlaxMBartForConditionalGeneration.from_pretrained('facebook/mbart-large-cc25') >>> model = FlaxMBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25")
>>> tokenizer = MBartTokenizer.from_pretrained('facebook/mbart-large-cc25') >>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, max_length=1024, return_tensors='jax') >>> inputs = tokenizer(text, max_length=1024, return_tensors="jax")
>>> encoder_outputs = model.encode(**inputs) >>> encoder_outputs = model.encode(**inputs)
>>> decoder_start_token_id = model.config.decoder_start_token_id >>> decoder_start_token_id = model.config.decoder_start_token_id
......
...@@ -1776,8 +1776,8 @@ class MBartForCausalLM(MBartPreTrainedModel): ...@@ -1776,8 +1776,8 @@ class MBartForCausalLM(MBartPreTrainedModel):
```python ```python
>>> from transformers import MBartTokenizer, MBartForCausalLM >>> from transformers import MBartTokenizer, MBartForCausalLM
>>> tokenizer = MBartTokenizer.from_pretrained('facebook/bart-large') >>> tokenizer = MBartTokenizer.from_pretrained("facebook/bart-large")
>>> model = MBartForCausalLM.from_pretrained('facebook/bart-large', add_cross_attention=False) >>> model = MBartForCausalLM.from_pretrained("facebook/bart-large", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
......
...@@ -81,10 +81,11 @@ class MBartTokenizer(XLMRobertaTokenizer): ...@@ -81,10 +81,11 @@ class MBartTokenizer(XLMRobertaTokenizer):
```python ```python
>>> from transformers import MBartTokenizer >>> from transformers import MBartTokenizer
>>> tokenizer = MBartTokenizer.from_pretrained('facebook/mbart-large-en-ro', src_lang="en_XX", tgt_lang="ro_RO")
>>> tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
>>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria" >>> example_english_phrase = " UN Chief Says There Is No Military Solution in Syria"
>>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria" >>> expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
>>> inputs = tokenizer(example_english_phrase, return_tensors="pt) >>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
>>> with tokenizer.as_target_tokenizer(): >>> with tokenizer.as_target_tokenizer():
... labels = tokenizer(expected_translation_romanian, return_tensors="pt") ... labels = tokenizer(expected_translation_romanian, return_tensors="pt")
>>> inputs["labels"] = labels["input_ids"] >>> inputs["labels"] = labels["input_ids"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment