Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
24d5ad1d
Commit
24d5ad1d
authored
Jan 22, 2020
by
Lysandre
Committed by
Lysandre Debut
Jan 23, 2020
Browse files
Run the examples in slow
parent
9ddf60b6
Changes
24
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
85 additions
and
18 deletions
+85
-18
src/transformers/modeling_utils.py
src/transformers/modeling_utils.py
+1
-0
src/transformers/modeling_xlm.py
src/transformers/modeling_xlm.py
+17
-2
src/transformers/modeling_xlnet.py
src/transformers/modeling_xlnet.py
+40
-7
tests/test_examples.py
tests/test_examples.py
+27
-9
No files found.
src/transformers/modeling_utils.py
View file @
24d5ad1d
...
...
@@ -353,6 +353,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
Examples::
# For example purposes. Not runnable.
model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
...
...
src/transformers/modeling_xlm.py
View file @
24d5ad1d
...
...
@@ -437,6 +437,9 @@ class XLMModel(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMModel
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...
...
@@ -688,6 +691,9 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMWithLMHeadModel
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...
...
@@ -770,6 +776,9 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMForSequenceClassification
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...
...
@@ -869,13 +878,16 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMForQuestionAnsweringSimple
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss
, start_scores, end_scores
= outputs[
:2
]
loss = outputs[
0
]
"""
transformer_outputs
=
self
.
transformer
(
...
...
@@ -1000,13 +1012,16 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
Examples::
from transformers import XLMTokenizer, XLMForQuestionAnswering
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss
, start_scores, end_scores
= outputs[
:2
]
loss = outputs[
0
]
"""
transformer_outputs
=
self
.
transformer
(
...
...
src/transformers/modeling_xlnet.py
View file @
24d5ad1d
...
...
@@ -735,9 +735,14 @@ class XLNetModel(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetModel
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetModel.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
...
@@ -1016,14 +1021,19 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetLMHeadModel
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
# We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True)).unsqueeze(0) # We will predict the masked token
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
...
...
@@ -1114,8 +1124,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetForSequenceClassification
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
...
...
@@ -1212,11 +1226,16 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetForTokenClassification
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
model = XLNetForTokenClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
scores = outputs[0]
"""
...
...
@@ -1314,11 +1333,16 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
Examples::
from transformers import XLNetTokenizer, XLNetForMultipleChoice
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForMultipleChoice.from_pretrained('xlnet-base-cased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2]
...
...
@@ -1425,13 +1449,18 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
Examples::
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
from transformers import XLNetTokenizer, XLNetForQuestionAnsweringSimple
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss
, start_scores, end_scores
= outputs[
:2
]
loss = outputs[
0
]
"""
...
...
@@ -1560,13 +1589,17 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
Examples::
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
from transformers import XLNetTokenizer, XLNetForQuestionAnswering
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss
, start_scores, end_scores
= outputs[
:2
]
loss = outputs[
0
]
"""
transformer_outputs
=
self
.
transformer
(
...
...
tests/test_examples.py
View file @
24d5ad1d
...
...
@@ -17,7 +17,7 @@ import os
import
unittest
from
typing
import
List
,
Union
from
.utils
import
require_t
orch
from
.utils
import
require_t
f
,
require_torch
,
slow
def
get_examples_from_file
(
file
):
...
...
@@ -51,14 +51,19 @@ def get_examples_from_file(file):
elif
"examples::"
in
line
.
lower
():
example_mode
=
True
example_indentation
=
line
.
lower
().
find
(
"examples::"
)
elif
"::"
in
line
.
lower
():
example_mode
=
True
example_indentation
=
line
.
lower
().
find
(
"::"
)
# elif "::" in line.lower() and len(line.strip()) == 2:
# example_mode = True
# example_indentation = line.lower().find("::")
examples
=
[
"
\n
"
.
join
(
example
)
for
example
in
examples
]
examples
=
[
example
for
example
in
examples
if
"not runnable"
not
in
example
.
lower
()]
return
[
"
\n
"
.
join
(
example
)
for
example
in
examples
]
return
examples
@
require_torch
@
require_tf
@
slow
class
TestCodeExamples
(
unittest
.
TestCase
):
def
analyze_directory
(
self
,
directory
:
str
,
identifier
:
Union
[
str
,
None
]
=
None
,
ignore_files
:
Union
[
List
[
str
],
None
]
=
None
...
...
@@ -79,10 +84,10 @@ class TestCodeExamples(unittest.TestCase):
joined_examples
=
[]
def
execute_example
(
code_example
):
exec
(
code_example
)
exec
(
code_example
,
{}
)
# Some examples are the continuation of others.
if
len
(
examples
)
>
1
:
if
len
(
examples
)
>
0
:
joined_examples
.
append
(
examples
[
0
])
joined_examples_index
=
0
for
example
in
examples
[
1
:]:
...
...
@@ -97,8 +102,9 @@ class TestCodeExamples(unittest.TestCase):
print
(
"Testing"
,
file
,
str
(
len
(
joined_examples
))
+
"/"
+
str
(
len
(
joined_examples
)))
# Execute sub tests with every example.
with
self
.
subTest
(
msg
=
file
):
[
execute_example
(
code_example
)
for
code_example
in
joined_examples
]
for
index
,
code_example
in
enumerate
(
joined_examples
):
with
self
.
subTest
(
msg
=
file
+
" "
+
str
(
index
)
+
"/"
+
str
(
len
(
joined_examples
))
+
code_example
):
execute_example
(
code_example
)
def
test_configuration_examples
(
self
):
transformers_directory
=
"src/transformers"
...
...
@@ -109,3 +115,15 @@ class TestCodeExamples(unittest.TestCase):
def
test_main_doc_examples
(
self
):
doc_directory
=
"docs/source"
self
.
analyze_directory
(
doc_directory
)
def
test_modeling_examples
(
self
):
transformers_directory
=
"src/transformers"
modeling_files
=
"modeling"
ignore_files
=
[
"modeling_auto.py"
,
"modeling_t5.py"
,
"modeling_tf_auto.py"
,
"modeling_utils.py"
,
"modeling_tf_t5.py"
,
]
self
.
analyze_directory
(
transformers_directory
,
identifier
=
modeling_files
,
ignore_files
=
ignore_files
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment