Commit 24d5ad1d authored by Lysandre's avatar Lysandre Committed by Lysandre Debut
Browse files

Run the examples in slow

parent 9ddf60b6
......@@ -353,6 +353,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
Examples::
# For example purposes. Not runnable.
model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
......
......@@ -437,6 +437,9 @@ class XLMModel(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMModel
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
......@@ -688,6 +691,9 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMWithLMHeadModel
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
......@@ -770,6 +776,9 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMForSequenceClassification
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
......@@ -869,13 +878,16 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMForQuestionAnsweringSimple
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
loss = outputs[0]
"""
transformer_outputs = self.transformer(
......@@ -1000,13 +1012,16 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMForQuestionAnswering
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
loss = outputs[0]
"""
transformer_outputs = self.transformer(
......
......@@ -735,9 +735,14 @@ class XLNetModel(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetModel
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetModel.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
......@@ -1016,14 +1021,19 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetLMHeadModel
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
# We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True)).unsqueeze(0) # We will predict the masked token
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
......@@ -1114,8 +1124,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetForSequenceClassification
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
......@@ -1212,11 +1226,16 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetForTokenClassification
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
model = XLNetForTokenClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
scores = outputs[0]
"""
......@@ -1314,11 +1333,16 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetForMultipleChoice
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForMultipleChoice.from_pretrained('xlnet-base-cased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2]
......@@ -1425,13 +1449,18 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
Examples::
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
from transformers import XLNetTokenizer, XLNetForQuestionAnsweringSimple
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
loss = outputs[0]
"""
......@@ -1560,13 +1589,17 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
Examples::
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
from transformers import XLNetTokenizer, XLNetForQuestionAnswering
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
loss = outputs[0]
"""
transformer_outputs = self.transformer(
......
......@@ -17,7 +17,7 @@ import os
import unittest
from typing import List, Union
from .utils import require_torch
from .utils import require_tf, require_torch, slow
def get_examples_from_file(file):
......@@ -51,14 +51,19 @@ def get_examples_from_file(file):
elif "examples::" in line.lower():
example_mode = True
example_indentation = line.lower().find("examples::")
elif "::" in line.lower():
example_mode = True
example_indentation = line.lower().find("::")
# elif "::" in line.lower() and len(line.strip()) == 2:
# example_mode = True
# example_indentation = line.lower().find("::")
examples = ["\n".join(example) for example in examples]
examples = [example for example in examples if "not runnable" not in example.lower()]
return ["\n".join(example) for example in examples]
return examples
@require_torch
@require_tf
@slow
class TestCodeExamples(unittest.TestCase):
def analyze_directory(
self, directory: str, identifier: Union[str, None] = None, ignore_files: Union[List[str], None] = None
......@@ -79,10 +84,10 @@ class TestCodeExamples(unittest.TestCase):
joined_examples = []
def execute_example(code_example):
exec(code_example)
exec(code_example, {})
# Some examples are the continuation of others.
if len(examples) > 1:
if len(examples) > 0:
joined_examples.append(examples[0])
joined_examples_index = 0
for example in examples[1:]:
......@@ -97,8 +102,9 @@ class TestCodeExamples(unittest.TestCase):
print("Testing", file, str(len(joined_examples)) + "/" + str(len(joined_examples)))
# Execute sub tests with every example.
with self.subTest(msg=file):
[execute_example(code_example) for code_example in joined_examples]
for index, code_example in enumerate(joined_examples):
with self.subTest(msg=file + " " + str(index) + "/" + str(len(joined_examples)) + code_example):
execute_example(code_example)
def test_configuration_examples(self):
transformers_directory = "src/transformers"
......@@ -109,3 +115,15 @@ class TestCodeExamples(unittest.TestCase):
def test_main_doc_examples(self):
doc_directory = "docs/source"
self.analyze_directory(doc_directory)
def test_modeling_examples(self):
transformers_directory = "src/transformers"
modeling_files = "modeling"
ignore_files = [
"modeling_auto.py",
"modeling_t5.py",
"modeling_tf_auto.py",
"modeling_utils.py",
"modeling_tf_t5.py",
]
self.analyze_directory(transformers_directory, identifier=modeling_files, ignore_files=ignore_files)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment