Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
24d5ad1d
"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "bf493d556990271d1d2bfbce14d89443dc20f927"
Commit
24d5ad1d
authored
Jan 22, 2020
by
Lysandre
Committed by
Lysandre Debut
Jan 23, 2020
Browse files
Run the examples in slow
parent
9ddf60b6
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
85 additions
and
18 deletions
+85
-18
src/transformers/modeling_utils.py
src/transformers/modeling_utils.py
+1
-0
src/transformers/modeling_xlm.py
src/transformers/modeling_xlm.py
+17
-2
src/transformers/modeling_xlnet.py
src/transformers/modeling_xlnet.py
+40
-7
tests/test_examples.py
tests/test_examples.py
+27
-9
No files found.
src/transformers/modeling_utils.py
View file @
24d5ad1d
...
@@ -353,6 +353,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
...
@@ -353,6 +353,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
Examples::
Examples::
# For example purposes. Not runnable.
model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
...
...
src/transformers/modeling_xlm.py
View file @
24d5ad1d
...
@@ -437,6 +437,9 @@ class XLMModel(XLMPreTrainedModel):
...
@@ -437,6 +437,9 @@ class XLMModel(XLMPreTrainedModel):
Examples::
Examples::
from transformers import XLMTokenizer, XLMModel
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...
@@ -688,6 +691,9 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
...
@@ -688,6 +691,9 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
Examples::
Examples::
from transformers import XLMTokenizer, XLMWithLMHeadModel
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...
@@ -770,6 +776,9 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
...
@@ -770,6 +776,9 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
Examples::
Examples::
from transformers import XLMTokenizer, XLMForSequenceClassification
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...
@@ -869,13 +878,16 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
...
@@ -869,13 +878,16 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
Examples::
Examples::
from transformers import XLMTokenizer, XLMForQuestionAnsweringSimple
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnsweringSimple.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss
, start_scores, end_scores
= outputs[
:2
]
loss = outputs[
0
]
"""
"""
transformer_outputs
=
self
.
transformer
(
transformer_outputs
=
self
.
transformer
(
...
@@ -1000,13 +1012,16 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
...
@@ -1000,13 +1012,16 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
Examples::
Examples::
from transformers import XLMTokenizer, XLMForQuestionAnswering
import torch
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss
, start_scores, end_scores
= outputs[
:2
]
loss = outputs[
0
]
"""
"""
transformer_outputs
=
self
.
transformer
(
transformer_outputs
=
self
.
transformer
(
...
...
src/transformers/modeling_xlnet.py
View file @
24d5ad1d
...
@@ -735,9 +735,14 @@ class XLNetModel(XLNetPreTrainedModel):
...
@@ -735,9 +735,14 @@ class XLNetModel(XLNetPreTrainedModel):
Examples::
Examples::
from transformers import XLNetTokenizer, XLNetModel
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetModel.from_pretrained('xlnet-large-cased')
model = XLNetModel.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
...
@@ -1016,14 +1021,19 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
...
@@ -1016,14 +1021,19 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
Examples::
Examples::
from transformers import XLNetTokenizer, XLNetLMHeadModel
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
# We show how to setup inputs to predict a next token using a bi-directional context.
# We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True)).unsqueeze(0) # We will predict the masked token
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True)).unsqueeze(0) # We will predict the masked token
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
...
@@ -1114,8 +1124,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
...
@@ -1114,8 +1124,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
Examples::
Examples::
from transformers import XLNetTokenizer, XLNetForSequenceClassification
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
outputs = model(input_ids, labels=labels)
...
@@ -1212,11 +1226,16 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
...
@@ -1212,11 +1226,16 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
Examples::
Examples::
from transformers import XLNetTokenizer, XLNetForTokenClassification
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
model = XLNetForTokenClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
outputs = model(input_ids, labels=labels)
scores = outputs[0]
scores = outputs[0]
"""
"""
...
@@ -1314,11 +1333,16 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
...
@@ -1314,11 +1333,16 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
Examples::
Examples::
from transformers import XLNetTokenizer, XLNetForMultipleChoice
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForMultipleChoice.from_pretrained('xlnet-base-cased')
model = XLNetForMultipleChoice.from_pretrained('xlnet-base-cased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2]
loss, classification_scores = outputs[:2]
...
@@ -1425,13 +1449,18 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
...
@@ -1425,13 +1449,18 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
Examples::
Examples::
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
from transformers import XLNetTokenizer, XLNetForQuestionAnsweringSimple
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss
, start_scores, end_scores
= outputs[
:2
]
loss = outputs[
0
]
"""
"""
...
@@ -1560,13 +1589,17 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
...
@@ -1560,13 +1589,17 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
Examples::
Examples::
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
from transformers import XLNetTokenizer, XLNetForQuestionAnswering
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss
, start_scores, end_scores
= outputs[
:2
]
loss = outputs[
0
]
"""
"""
transformer_outputs
=
self
.
transformer
(
transformer_outputs
=
self
.
transformer
(
...
...
tests/test_examples.py
View file @
24d5ad1d
...
@@ -17,7 +17,7 @@ import os
...
@@ -17,7 +17,7 @@ import os
import
unittest
import
unittest
from
typing
import
List
,
Union
from
typing
import
List
,
Union
from
.utils
import
require_t
orch
from
.utils
import
require_t
f
,
require_torch
,
slow
def
get_examples_from_file
(
file
):
def
get_examples_from_file
(
file
):
...
@@ -51,14 +51,19 @@ def get_examples_from_file(file):
...
@@ -51,14 +51,19 @@ def get_examples_from_file(file):
elif
"examples::"
in
line
.
lower
():
elif
"examples::"
in
line
.
lower
():
example_mode
=
True
example_mode
=
True
example_indentation
=
line
.
lower
().
find
(
"examples::"
)
example_indentation
=
line
.
lower
().
find
(
"examples::"
)
elif
"::"
in
line
.
lower
():
# elif "::" in line.lower() and len(line.strip()) == 2:
example_mode
=
True
# example_mode = True
example_indentation
=
line
.
lower
().
find
(
"::"
)
# example_indentation = line.lower().find("::")
examples
=
[
"
\n
"
.
join
(
example
)
for
example
in
examples
]
examples
=
[
example
for
example
in
examples
if
"not runnable"
not
in
example
.
lower
()]
return
[
"
\n
"
.
join
(
example
)
for
example
in
examples
]
return
examples
@
require_torch
@
require_torch
@
require_tf
@
slow
class
TestCodeExamples
(
unittest
.
TestCase
):
class
TestCodeExamples
(
unittest
.
TestCase
):
def
analyze_directory
(
def
analyze_directory
(
self
,
directory
:
str
,
identifier
:
Union
[
str
,
None
]
=
None
,
ignore_files
:
Union
[
List
[
str
],
None
]
=
None
self
,
directory
:
str
,
identifier
:
Union
[
str
,
None
]
=
None
,
ignore_files
:
Union
[
List
[
str
],
None
]
=
None
...
@@ -79,10 +84,10 @@ class TestCodeExamples(unittest.TestCase):
...
@@ -79,10 +84,10 @@ class TestCodeExamples(unittest.TestCase):
joined_examples
=
[]
joined_examples
=
[]
def
execute_example
(
code_example
):
def
execute_example
(
code_example
):
exec
(
code_example
)
exec
(
code_example
,
{}
)
# Some examples are the continuation of others.
# Some examples are the continuation of others.
if
len
(
examples
)
>
1
:
if
len
(
examples
)
>
0
:
joined_examples
.
append
(
examples
[
0
])
joined_examples
.
append
(
examples
[
0
])
joined_examples_index
=
0
joined_examples_index
=
0
for
example
in
examples
[
1
:]:
for
example
in
examples
[
1
:]:
...
@@ -97,8 +102,9 @@ class TestCodeExamples(unittest.TestCase):
...
@@ -97,8 +102,9 @@ class TestCodeExamples(unittest.TestCase):
print
(
"Testing"
,
file
,
str
(
len
(
joined_examples
))
+
"/"
+
str
(
len
(
joined_examples
)))
print
(
"Testing"
,
file
,
str
(
len
(
joined_examples
))
+
"/"
+
str
(
len
(
joined_examples
)))
# Execute sub tests with every example.
# Execute sub tests with every example.
with
self
.
subTest
(
msg
=
file
):
for
index
,
code_example
in
enumerate
(
joined_examples
):
[
execute_example
(
code_example
)
for
code_example
in
joined_examples
]
with
self
.
subTest
(
msg
=
file
+
" "
+
str
(
index
)
+
"/"
+
str
(
len
(
joined_examples
))
+
code_example
):
execute_example
(
code_example
)
def
test_configuration_examples
(
self
):
def
test_configuration_examples
(
self
):
transformers_directory
=
"src/transformers"
transformers_directory
=
"src/transformers"
...
@@ -109,3 +115,15 @@ class TestCodeExamples(unittest.TestCase):
...
@@ -109,3 +115,15 @@ class TestCodeExamples(unittest.TestCase):
def
test_main_doc_examples
(
self
):
def
test_main_doc_examples
(
self
):
doc_directory
=
"docs/source"
doc_directory
=
"docs/source"
self
.
analyze_directory
(
doc_directory
)
self
.
analyze_directory
(
doc_directory
)
def
test_modeling_examples
(
self
):
transformers_directory
=
"src/transformers"
modeling_files
=
"modeling"
ignore_files
=
[
"modeling_auto.py"
,
"modeling_t5.py"
,
"modeling_tf_auto.py"
,
"modeling_utils.py"
,
"modeling_tf_t5.py"
,
]
self
.
analyze_directory
(
transformers_directory
,
identifier
=
modeling_files
,
ignore_files
=
ignore_files
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment