Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
36bca545
Commit
36bca545
authored
Jul 05, 2019
by
thomwolf
Browse files
tokenization abstract class - tests for examples
parent
a4f98054
Changes
32
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
466 additions
and
48 deletions
+466
-48
examples/run_squad.py
examples/run_squad.py
+400
-0
examples/test_examples.py
examples/test_examples.py
+48
-0
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+3
-3
pytorch_transformers/modeling_bert.py
pytorch_transformers/modeling_bert.py
+1
-1
pytorch_transformers/modeling_gpt2.py
pytorch_transformers/modeling_gpt2.py
+1
-1
pytorch_transformers/modeling_openai.py
pytorch_transformers/modeling_openai.py
+1
-1
pytorch_transformers/modeling_transfo_xl.py
pytorch_transformers/modeling_transfo_xl.py
+1
-1
pytorch_transformers/modeling_utils.py
pytorch_transformers/modeling_utils.py
+0
-6
pytorch_transformers/modeling_xlm.py
pytorch_transformers/modeling_xlm.py
+1
-1
pytorch_transformers/modeling_xlnet.py
pytorch_transformers/modeling_xlnet.py
+1
-1
pytorch_transformers/tests/modeling_bert_test.py
pytorch_transformers/tests/modeling_bert_test.py
+1
-1
pytorch_transformers/tests/modeling_gpt2_test.py
pytorch_transformers/tests/modeling_gpt2_test.py
+1
-1
pytorch_transformers/tests/modeling_openai_test.py
pytorch_transformers/tests/modeling_openai_test.py
+1
-1
pytorch_transformers/tests/modeling_tests_commons.py
pytorch_transformers/tests/modeling_tests_commons.py
+0
-0
pytorch_transformers/tests/modeling_transfo_xl_test.py
pytorch_transformers/tests/modeling_transfo_xl_test.py
+1
-1
pytorch_transformers/tests/modeling_utils_test.py
pytorch_transformers/tests/modeling_utils_test.py
+1
-8
pytorch_transformers/tests/modeling_xlm_test.py
pytorch_transformers/tests/modeling_xlm_test.py
+1
-1
pytorch_transformers/tests/modeling_xlnet_test.py
pytorch_transformers/tests/modeling_xlnet_test.py
+1
-1
pytorch_transformers/tests/tokenization_bert_test.py
pytorch_transformers/tests/tokenization_bert_test.py
+1
-9
pytorch_transformers/tests/tokenization_gpt2_test.py
pytorch_transformers/tests/tokenization_gpt2_test.py
+1
-10
No files found.
examples/run_squad.py
0 → 100644
View file @
36bca545
This diff is collapsed.
Click to expand it.
pytorch_transformers/tests/model_utils_t
es
t
.py
→
examples/test_exampl
es.py
View file @
36bca545
...
...
@@ -16,35 +16,33 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
o
s
import
sy
s
import
unittest
import
json
import
random
import
shutil
import
pytest
import
argparse
import
torch
try
:
# python 3.4+ can use builtin unittest.mock instead of mock package
from
unittest.mock
import
patch
except
ImportError
:
from
mock
import
patch
from
pytorch_transformers
import
PretrainedConfig
,
PreTrainedModel
from
pytorch_transformers.modeling_bert
import
BertModel
,
BertConfig
,
PRETRAINED_MODEL_ARCHIVE_MAP
,
PRETRAINED_CONFIG_ARCHIVE_MAP
import
run_bert_squad
as
rbs
def
get_setup_file
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-f'
)
args
=
parser
.
parse_args
()
return
args
.
f
class
ModelUtilsTest
(
unittest
.
TestCase
):
def
test_model_from_pretrained
(
self
):
for
model_name
in
list
(
PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
config
=
BertConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
PretrainedConfig
)
class
ExamplesTests
(
unittest
.
TestCase
):
model
=
BertModel
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
PreTrainedModel
)
def
test_run_squad
(
self
):
testargs
=
[
"prog"
,
"-f"
,
"/home/test/setup.py"
]
with
patch
.
object
(
sys
,
'argv'
,
testargs
):
setup
=
get_setup_file
()
assert
setup
==
"/home/test/setup.py"
# rbs.main()
config
=
BertConfig
.
from_pretrained
(
model_name
,
output_attentions
=
True
,
output_hidden_states
=
True
)
model
=
BertModel
.
from_pretrained
(
model_name
,
output_attentions
=
True
,
output_hidden_states
=
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
,
config
)
if
__name__
==
"__main__"
:
unittest
.
main
()
pytorch_transformers/__init__.py
View file @
36bca545
...
...
@@ -5,6 +5,7 @@ from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
from
.tokenization_gpt2
import
GPT2Tokenizer
from
.tokenization_xlnet
import
XLNetTokenizer
,
SPIECE_UNDERLINE
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_utils
import
(
PreTrainedTokenizer
,
clean_up_tokenization
)
from
.modeling_bert
import
(
BertConfig
,
BertModel
,
BertForPreTraining
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
...
...
@@ -26,11 +27,10 @@ from .modeling_xlnet import (XLNetConfig,
from
.modeling_xlm
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForSequenceClassification
,
XLMForQuestionAnswering
)
from
.modeling_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
from
.optimization
import
BertAdam
from
.optimization_openai
import
OpenAIAdam
from
.file_utils
import
(
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
from
.model_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
pytorch_transformers/modeling_bert.py
View file @
36bca545
...
...
@@ -29,7 +29,7 @@ from torch import nn
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
.file_utils
import
cached_path
from
.model_utils
import
WEIGHTS_NAME
,
CONFIG_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_linear_layer
from
.model
ing
_utils
import
WEIGHTS_NAME
,
CONFIG_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_linear_layer
logger
=
logging
.
getLogger
(
__name__
)
...
...
pytorch_transformers/modeling_gpt2.py
View file @
36bca545
...
...
@@ -31,7 +31,7 @@ from torch.nn import CrossEntropyLoss
from
torch.nn.parameter
import
Parameter
from
.file_utils
import
cached_path
from
.model_utils
import
(
Conv1D
,
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
from
.model
ing
_utils
import
(
Conv1D
,
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_conv1d_layer
,
SequenceSummary
)
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
...
...
pytorch_transformers/modeling_openai.py
View file @
36bca545
...
...
@@ -31,7 +31,7 @@ from torch.nn import CrossEntropyLoss
from
torch.nn.parameter
import
Parameter
from
.file_utils
import
cached_path
from
.model_utils
import
(
Conv1D
,
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
from
.model
ing
_utils
import
(
Conv1D
,
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_conv1d_layer
,
SequenceSummary
)
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
...
...
pytorch_transformers/modeling_transfo_xl.py
View file @
36bca545
...
...
@@ -37,7 +37,7 @@ from torch.nn.parameter import Parameter
from
.modeling_bert
import
BertLayerNorm
as
LayerNorm
from
.modeling_transfo_xl_utilities
import
ProjectedAdaptiveLogSoftmax
,
sample_logits
from
.file_utils
import
cached_path
from
.model_utils
import
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
from
.model
ing
_utils
import
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
logger
=
logging
.
getLogger
(
__name__
)
...
...
pytorch_transformers/model_utils.py
→
pytorch_transformers/model
ing
_utils.py
View file @
36bca545
...
...
@@ -598,9 +598,3 @@ def prune_layer(layer, index, dim=None):
return
prune_conv1d_layer
(
layer
,
index
,
dim
=
1
if
dim
is
None
else
dim
)
else
:
raise
ValueError
(
"Can't prune layer of class {}"
.
format
(
layer
.
__class__
))
def
clean_up_tokenization
(
out_string
):
out_string
.
replace
(
' .'
,
'.'
).
replace
(
' ?'
,
'?'
).
replace
(
' !'
,
'!'
).
replace
(
' ,'
,
','
).
replace
(
" ' "
,
"'"
).
replace
(
" n't"
,
"n't"
).
replace
(
" 'm"
,
"'m"
).
replace
(
" do not"
,
" don't"
).
replace
(
" 's"
,
"'s"
).
replace
(
" 've"
,
"'ve"
).
replace
(
" 're"
,
"'re"
)
return
out_string
pytorch_transformers/modeling_xlm.py
View file @
36bca545
...
...
@@ -35,7 +35,7 @@ from torch.nn import functional as F
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
.file_utils
import
cached_path
from
.model_utils
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
from
.model
ing
_utils
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_linear_layer
,
SequenceSummary
,
SQuADHead
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
pytorch_transformers/modeling_xlnet.py
View file @
36bca545
...
...
@@ -32,7 +32,7 @@ from torch.nn import functional as F
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
.file_utils
import
cached_path
from
.model_utils
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
from
.model
ing
_utils
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
SequenceSummary
,
PoolerAnswerClass
,
PoolerEndLogits
,
PoolerStartLogits
)
...
...
pytorch_transformers/tests/modeling_bert_test.py
View file @
36bca545
...
...
@@ -26,7 +26,7 @@ from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
BertForTokenClassification
,
BertForMultipleChoice
)
from
pytorch_transformers.modeling_bert
import
PRETRAINED_MODEL_ARCHIVE_MAP
from
.model_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
ids_tensor
)
from
.model
ing
_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
ids_tensor
)
class
BertModelTest
(
unittest
.
TestCase
):
...
...
pytorch_transformers/tests/modeling_gpt2_test.py
View file @
36bca545
...
...
@@ -28,7 +28,7 @@ import torch
from
pytorch_transformers
import
(
GPT2Config
,
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
from
.model_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
GPTModelTester
)
from
.model
ing
_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
GPTModelTester
)
class
GPT2ModelTest
(
unittest
.
TestCase
):
...
...
pytorch_transformers/tests/modeling_openai_test.py
View file @
36bca545
...
...
@@ -24,7 +24,7 @@ import torch
from
pytorch_transformers
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
from
.model_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
GPTModelTester
)
from
.model
ing
_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
GPTModelTester
)
class
OpenAIModelTest
(
unittest
.
TestCase
):
...
...
pytorch_transformers/tests/model_tests_commons.py
→
pytorch_transformers/tests/model
ing
_tests_commons.py
View file @
36bca545
File moved
pytorch_transformers/tests/modeling_transfo_xl_test.py
View file @
36bca545
...
...
@@ -28,7 +28,7 @@ import torch
from
pytorch_transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
pytorch_transformers.modeling_transfo_xl
import
PRETRAINED_MODEL_ARCHIVE_MAP
from
.model_tests_commons
import
ConfigTester
,
create_and_check_commons
,
ids_tensor
from
.model
ing
_tests_commons
import
ConfigTester
,
create_and_check_commons
,
ids_tensor
class
TransfoXLModelTest
(
unittest
.
TestCase
):
class
TransfoXLModelTester
(
object
):
...
...
examples/tests/example
s_test
s
.py
→
pytorch_transformers/tests/modeling_util
s_test.py
View file @
36bca545
...
...
@@ -16,17 +16,10 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
unittest
import
json
import
random
import
shutil
import
pytest
import
torch
from
pytorch_transformers
import
PretrainedConfig
,
PreTrainedModel
from
pytorch_transformers.modeling_bert
import
BertModel
,
BertConfig
,
PRETRAINED_MODEL_ARCHIVE_MAP
,
PRETRAINED_CONFIG_ARCHIVE_MAP
from
pytorch_transformers.modeling_bert
import
BertModel
,
BertConfig
,
PRETRAINED_MODEL_ARCHIVE_MAP
class
ModelUtilsTest
(
unittest
.
TestCase
):
...
...
pytorch_transformers/tests/modeling_xlm_test.py
View file @
36bca545
...
...
@@ -23,7 +23,7 @@ import pytest
from
pytorch_transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
from
pytorch_transformers.modeling_xlm
import
PRETRAINED_MODEL_ARCHIVE_MAP
from
.model_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
ids_tensor
)
from
.model
ing
_tests_commons
import
(
create_and_check_commons
,
ConfigTester
,
ids_tensor
)
class
XLMModelTest
(
unittest
.
TestCase
):
...
...
pytorch_transformers/tests/modeling_xlnet_test.py
View file @
36bca545
...
...
@@ -28,7 +28,7 @@ import torch
from
pytorch_transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
from
pytorch_transformers.modeling_xlnet
import
PRETRAINED_MODEL_ARCHIVE_MAP
from
.model_tests_commons
import
ConfigTester
,
create_and_check_commons
,
ids_tensor
from
.model
ing
_tests_commons
import
ConfigTester
,
create_and_check_commons
,
ids_tensor
class
XLNetModelTest
(
unittest
.
TestCase
):
class
XLNetModelTester
(
object
):
...
...
pytorch_transformers/tests/tokenization_bert_test.py
View file @
36bca545
...
...
@@ -24,7 +24,7 @@ from pytorch_transformers.tokenization_bert import (BasicTokenizer,
BertTokenizer
,
WordpieceTokenizer
,
_is_control
,
_is_punctuation
,
_is_whitespace
,
PRETRAINED_VOCAB_ARCHIVE_MAP
)
_is_whitespace
)
from
.tokenization_tests_commons
import
create_and_check_tokenizer_commons
...
...
@@ -49,14 +49,6 @@ class TokenizationTest(unittest.TestCase):
os
.
remove
(
vocab_file
)
@
pytest
.
mark
.
slow
def
test_tokenizer_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
PRETRAINED_VOCAB_ARCHIVE_MAP
.
keys
())[:
1
]:
tokenizer
=
BertTokenizer
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
tokenizer
)
def
test_chinese
(
self
):
tokenizer
=
BasicTokenizer
()
...
...
pytorch_transformers/tests/tokenization_gpt2_test.py
View file @
36bca545
...
...
@@ -17,10 +17,8 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import
os
import
unittest
import
json
import
shutil
import
pytest
from
pytorch_transformers.tokenization_gpt2
import
GPT2Tokenizer
,
PRETRAINED_VOCAB_ARCHIVE_MAP
from
pytorch_transformers.tokenization_gpt2
import
GPT2Tokenizer
from
.tokenization_tests_commons
import
create_and_check_tokenizer_commons
...
...
@@ -56,13 +54,6 @@ class GPT2TokenizationTest(unittest.TestCase):
os
.
remove
(
vocab_file
)
os
.
remove
(
merges_file
)
# @pytest.mark.slow
def
test_tokenizer_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
PRETRAINED_VOCAB_ARCHIVE_MAP
.
keys
())[:
1
]:
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
tokenizer
)
if
__name__
==
'__main__'
:
unittest
.
main
()
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment