"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "bf493d556990271d1d2bfbce14d89443dc20f927"
Commit 24d5ad1d authored by Lysandre's avatar Lysandre Committed by Lysandre Debut
Browse files

Run the examples in slow

parent 9ddf60b6
...@@ -353,6 +353,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): ...@@ -353,6 +353,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
Examples:: Examples::
# For example purposes. Not runnable.
model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
......
...@@ -437,6 +437,9 @@ class XLMModel(XLMPreTrainedModel): ...@@ -437,6 +437,9 @@ class XLMModel(XLMPreTrainedModel):
Examples:: Examples::
from transformers import XLMTokenizer, XLMModel
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMModel.from_pretrained('xlm-mlm-en-2048') model = XLMModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -688,6 +691,9 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): ...@@ -688,6 +691,9 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
Examples:: Examples::
from transformers import XLMTokenizer, XLMWithLMHeadModel
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048') model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -770,6 +776,9 @@ class XLMForSequenceClassification(XLMPreTrainedModel): ...@@ -770,6 +776,9 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
Examples:: Examples::
from transformers import XLMTokenizer, XLMForSequenceClassification
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048') model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -869,13 +878,16 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel): ...@@ -869,13 +878,16 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
Examples:: Examples::
from transformers import XLMTokenizer, XLMForQuestionAnsweringSimple
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048') model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2] loss = outputs[0]
""" """
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
...@@ -1000,13 +1012,16 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): ...@@ -1000,13 +1012,16 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
Examples:: Examples::
from transformers import XLMTokenizer, XLMForQuestionAnswering
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048') model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2] loss = outputs[0]
""" """
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
......
...@@ -735,9 +735,14 @@ class XLNetModel(XLNetPreTrainedModel): ...@@ -735,9 +735,14 @@ class XLNetModel(XLNetPreTrainedModel):
Examples:: Examples::
from transformers import XLNetTokenizer, XLNetModel
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetModel.from_pretrained('xlnet-large-cased') model = XLNetModel.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...@@ -1016,14 +1021,19 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): ...@@ -1016,14 +1021,19 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
Examples:: Examples::
from transformers import XLNetTokenizer, XLNetLMHeadModel
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased') model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
# We show how to setup inputs to predict a next token using a bi-directional context. # We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True)).unsqueeze(0) # We will predict the masked token input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True)).unsqueeze(0) # We will predict the masked token
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
...@@ -1114,8 +1124,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): ...@@ -1114,8 +1124,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
Examples:: Examples::
from transformers import XLNetTokenizer, XLNetForSequenceClassification
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased') model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
...@@ -1212,11 +1226,16 @@ class XLNetForTokenClassification(XLNetPreTrainedModel): ...@@ -1212,11 +1226,16 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
Examples:: Examples::
from transformers import XLNetTokenizer, XLNetForTokenClassification
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased') model = XLNetForTokenClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
scores = outputs[0] scores = outputs[0]
""" """
...@@ -1314,11 +1333,16 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel): ...@@ -1314,11 +1333,16 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
Examples:: Examples::
from transformers import XLNetTokenizer, XLNetForMultipleChoice
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForMultipleChoice.from_pretrained('xlnet-base-cased') model = XLNetForMultipleChoice.from_pretrained('xlnet-base-cased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1 labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2] loss, classification_scores = outputs[:2]
...@@ -1425,13 +1449,18 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel): ...@@ -1425,13 +1449,18 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
Examples:: Examples::
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') from transformers import XLNetTokenizer, XLNetForQuestionAnsweringSimple
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2] loss = outputs[0]
""" """
...@@ -1560,13 +1589,17 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): ...@@ -1560,13 +1589,17 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
Examples:: Examples::
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') from transformers import XLNetTokenizer, XLNetForQuestionAnswering
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2] loss = outputs[0]
""" """
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
......
...@@ -17,7 +17,7 @@ import os ...@@ -17,7 +17,7 @@ import os
import unittest import unittest
from typing import List, Union from typing import List, Union
from .utils import require_torch from .utils import require_tf, require_torch, slow
def get_examples_from_file(file): def get_examples_from_file(file):
...@@ -51,14 +51,19 @@ def get_examples_from_file(file): ...@@ -51,14 +51,19 @@ def get_examples_from_file(file):
elif "examples::" in line.lower(): elif "examples::" in line.lower():
example_mode = True example_mode = True
example_indentation = line.lower().find("examples::") example_indentation = line.lower().find("examples::")
elif "::" in line.lower(): # elif "::" in line.lower() and len(line.strip()) == 2:
example_mode = True # example_mode = True
example_indentation = line.lower().find("::") # example_indentation = line.lower().find("::")
examples = ["\n".join(example) for example in examples]
examples = [example for example in examples if "not runnable" not in example.lower()]
return ["\n".join(example) for example in examples] return examples
@require_torch @require_torch
@require_tf
@slow
class TestCodeExamples(unittest.TestCase): class TestCodeExamples(unittest.TestCase):
def analyze_directory( def analyze_directory(
self, directory: str, identifier: Union[str, None] = None, ignore_files: Union[List[str], None] = None self, directory: str, identifier: Union[str, None] = None, ignore_files: Union[List[str], None] = None
...@@ -79,10 +84,10 @@ class TestCodeExamples(unittest.TestCase): ...@@ -79,10 +84,10 @@ class TestCodeExamples(unittest.TestCase):
joined_examples = [] joined_examples = []
def execute_example(code_example): def execute_example(code_example):
exec(code_example) exec(code_example, {})
# Some examples are the continuation of others. # Some examples are the continuation of others.
if len(examples) > 1: if len(examples) > 0:
joined_examples.append(examples[0]) joined_examples.append(examples[0])
joined_examples_index = 0 joined_examples_index = 0
for example in examples[1:]: for example in examples[1:]:
...@@ -97,8 +102,9 @@ class TestCodeExamples(unittest.TestCase): ...@@ -97,8 +102,9 @@ class TestCodeExamples(unittest.TestCase):
print("Testing", file, str(len(joined_examples)) + "/" + str(len(joined_examples))) print("Testing", file, str(len(joined_examples)) + "/" + str(len(joined_examples)))
# Execute sub tests with every example. # Execute sub tests with every example.
with self.subTest(msg=file): for index, code_example in enumerate(joined_examples):
[execute_example(code_example) for code_example in joined_examples] with self.subTest(msg=file + " " + str(index) + "/" + str(len(joined_examples)) + code_example):
execute_example(code_example)
def test_configuration_examples(self): def test_configuration_examples(self):
transformers_directory = "src/transformers" transformers_directory = "src/transformers"
...@@ -109,3 +115,15 @@ class TestCodeExamples(unittest.TestCase): ...@@ -109,3 +115,15 @@ class TestCodeExamples(unittest.TestCase):
def test_main_doc_examples(self): def test_main_doc_examples(self):
doc_directory = "docs/source" doc_directory = "docs/source"
self.analyze_directory(doc_directory) self.analyze_directory(doc_directory)
def test_modeling_examples(self):
transformers_directory = "src/transformers"
modeling_files = "modeling"
ignore_files = [
"modeling_auto.py",
"modeling_t5.py",
"modeling_tf_auto.py",
"modeling_utils.py",
"modeling_tf_t5.py",
]
self.analyze_directory(transformers_directory, identifier=modeling_files, ignore_files=ignore_files)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment