Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
d5712f7c
Unverified
Commit
d5712f7c
authored
Dec 12, 2019
by
Thomas Wolf
Committed by
GitHub
Dec 12, 2019
Browse files
Merge branch 'master' into check-link-validity
parents
f230d91b
9c58b236
Changes
98
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
386 additions
and
88 deletions
+386
-88
transformers/tests/modeling_roberta_test.py
transformers/tests/modeling_roberta_test.py
+12
-10
transformers/tests/modeling_tf_albert_test.py
transformers/tests/modeling_tf_albert_test.py
+3
-4
transformers/tests/modeling_tf_auto_test.py
transformers/tests/modeling_tf_auto_test.py
+12
-7
transformers/tests/modeling_tf_bert_test.py
transformers/tests/modeling_tf_bert_test.py
+3
-4
transformers/tests/modeling_tf_common_test.py
transformers/tests/modeling_tf_common_test.py
+4
-4
transformers/tests/modeling_tf_ctrl_test.py
transformers/tests/modeling_tf_ctrl_test.py
+3
-4
transformers/tests/modeling_tf_distilbert_test.py
transformers/tests/modeling_tf_distilbert_test.py
+3
-4
transformers/tests/modeling_tf_gpt2_test.py
transformers/tests/modeling_tf_gpt2_test.py
+3
-4
transformers/tests/modeling_tf_openai_gpt_test.py
transformers/tests/modeling_tf_openai_gpt_test.py
+3
-4
transformers/tests/modeling_tf_roberta_test.py
transformers/tests/modeling_tf_roberta_test.py
+9
-10
transformers/tests/modeling_tf_transfo_xl_test.py
transformers/tests/modeling_tf_transfo_xl_test.py
+3
-4
transformers/tests/modeling_tf_xlm_test.py
transformers/tests/modeling_tf_xlm_test.py
+3
-4
transformers/tests/modeling_tf_xlnet_test.py
transformers/tests/modeling_tf_xlnet_test.py
+5
-5
transformers/tests/modeling_transfo_xl_test.py
transformers/tests/modeling_transfo_xl_test.py
+6
-4
transformers/tests/modeling_xlm_test.py
transformers/tests/modeling_xlm_test.py
+8
-4
transformers/tests/modeling_xlnet_test.py
transformers/tests/modeling_xlnet_test.py
+14
-7
transformers/tests/optimization_test.py
transformers/tests/optimization_test.py
+3
-3
transformers/tests/optimization_tf_test.py
transformers/tests/optimization_tf_test.py
+90
-0
transformers/tests/tokenization_auto_test.py
transformers/tests/tokenization_auto_test.py
+8
-2
transformers/tests/tokenization_bert_japanese_test.py
transformers/tests/tokenization_bert_japanese_test.py
+191
-0
No files found.
transformers/tests/modeling_roberta_test.py
View file @
d5712f7c
...
@@ -18,7 +18,6 @@ from __future__ import print_function
...
@@ -18,7 +18,6 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -27,13 +26,13 @@ if is_torch_available():
...
@@ -27,13 +26,13 @@ if is_torch_available():
from
transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
from
transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
RobertaForSequenceClassification
,
RobertaForTokenClassification
)
RobertaForSequenceClassification
,
RobertaForTokenClassification
)
from
transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from
transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
@
require_torch
class
RobertaModelTest
(
CommonTestCases
.
CommonModelTester
):
class
RobertaModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
RobertaForMaskedLM
,
RobertaModel
)
if
is_torch_available
()
else
()
all_model_classes
=
(
RobertaForMaskedLM
,
RobertaModel
)
if
is_torch_available
()
else
()
...
@@ -129,6 +128,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
...
@@ -129,6 +128,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_roberta_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
def
create_and_check_roberta_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
token_labels
,
choice_labels
):
model
=
RobertaModel
(
config
=
config
)
model
=
RobertaModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
sequence_output
,
pooled_output
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
...
@@ -146,6 +146,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
...
@@ -146,6 +146,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_roberta_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
def
create_and_check_roberta_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
token_labels
,
choice_labels
):
model
=
RobertaForMaskedLM
(
config
=
config
)
model
=
RobertaForMaskedLM
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
loss
,
prediction_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
masked_lm_labels
=
token_labels
)
loss
,
prediction_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
masked_lm_labels
=
token_labels
)
result
=
{
result
=
{
...
@@ -161,6 +162,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
...
@@ -161,6 +162,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
sequence_labels
,
token_labels
,
choice_labels
):
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
config
.
num_labels
=
self
.
num_labels
model
=
RobertaForTokenClassification
(
config
=
config
)
model
=
RobertaForTokenClassification
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
labels
=
token_labels
)
...
@@ -195,7 +197,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
...
@@ -195,7 +197,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_roberta_for_masked_lm
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_roberta_for_masked_lm
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
@@ -207,10 +209,10 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
...
@@ -207,10 +209,10 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
class
RobertaModelIntegrationTest
(
unittest
.
TestCase
):
class
RobertaModelIntegrationTest
(
unittest
.
TestCase
):
@
pytest
.
mark
.
slow
@
slow
def
test_inference_masked_lm
(
self
):
def
test_inference_masked_lm
(
self
):
model
=
RobertaForMaskedLM
.
from_pretrained
(
'roberta-base'
)
model
=
RobertaForMaskedLM
.
from_pretrained
(
'roberta-base'
)
input_ids
=
torch
.
tensor
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
input_ids
=
torch
.
tensor
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
output
=
model
(
input_ids
)[
0
]
output
=
model
(
input_ids
)[
0
]
expected_shape
=
torch
.
Size
((
1
,
11
,
50265
))
expected_shape
=
torch
.
Size
((
1
,
11
,
50265
))
...
@@ -228,10 +230,10 @@ class RobertaModelIntegrationTest(unittest.TestCase):
...
@@ -228,10 +230,10 @@ class RobertaModelIntegrationTest(unittest.TestCase):
torch
.
allclose
(
output
[:,
:
3
,
:
3
],
expected_slice
,
atol
=
1e-3
)
torch
.
allclose
(
output
[:,
:
3
,
:
3
],
expected_slice
,
atol
=
1e-3
)
)
)
@
pytest
.
mark
.
slow
@
slow
def
test_inference_no_head
(
self
):
def
test_inference_no_head
(
self
):
model
=
RobertaModel
.
from_pretrained
(
'roberta-base'
)
model
=
RobertaModel
.
from_pretrained
(
'roberta-base'
)
input_ids
=
torch
.
tensor
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
input_ids
=
torch
.
tensor
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
output
=
model
(
input_ids
)[
0
]
output
=
model
(
input_ids
)[
0
]
# compare the actual values for a slice.
# compare the actual values for a slice.
...
@@ -244,10 +246,10 @@ class RobertaModelIntegrationTest(unittest.TestCase):
...
@@ -244,10 +246,10 @@ class RobertaModelIntegrationTest(unittest.TestCase):
torch
.
allclose
(
output
[:,
:
3
,
:
3
],
expected_slice
,
atol
=
1e-3
)
torch
.
allclose
(
output
[:,
:
3
,
:
3
],
expected_slice
,
atol
=
1e-3
)
)
)
@
pytest
.
mark
.
slow
@
slow
def
test_inference_classification_head
(
self
):
def
test_inference_classification_head
(
self
):
model
=
RobertaForSequenceClassification
.
from_pretrained
(
'roberta-large-mnli'
)
model
=
RobertaForSequenceClassification
.
from_pretrained
(
'roberta-large-mnli'
)
input_ids
=
torch
.
tensor
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
input_ids
=
torch
.
tensor
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
output
=
model
(
input_ids
)[
0
]
output
=
model
(
input_ids
)[
0
]
expected_shape
=
torch
.
Size
((
1
,
3
))
expected_shape
=
torch
.
Size
((
1
,
3
))
...
...
transformers/tests/modeling_tf_albert_test.py
View file @
d5712f7c
...
@@ -18,11 +18,11 @@ from __future__ import print_function
...
@@ -18,11 +18,11 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
AlbertConfig
,
is_tf_available
from
transformers
import
AlbertConfig
,
is_tf_available
...
@@ -31,10 +31,9 @@ if is_tf_available():
...
@@ -31,10 +31,9 @@ if is_tf_available():
from
transformers.modeling_tf_albert
import
(
TFAlbertModel
,
TFAlbertForMaskedLM
,
from
transformers.modeling_tf_albert
import
(
TFAlbertModel
,
TFAlbertForMaskedLM
,
TFAlbertForSequenceClassification
,
TFAlbertForSequenceClassification
,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFAlbertModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFAlbertModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
all_model_classes
=
(
...
@@ -216,7 +215,7 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -216,7 +215,7 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
self
.
model_tester
.
create_and_check_albert_for_sequence_classification
(
self
.
model_tester
.
create_and_check_albert_for_sequence_classification
(
*
config_and_inputs
)
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
# for model_name in list(TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
...
...
transformers/tests/modeling_tf_auto_test.py
View file @
d5712f7c
...
@@ -18,11 +18,12 @@ from __future__ import print_function
...
@@ -18,11 +18,12 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
import
logging
import
logging
from
transformers
import
is_tf_available
from
transformers
import
is_tf_available
from
.utils
import
require_tf
,
slow
,
SMALL_MODEL_IDENTIFIER
if
is_tf_available
():
if
is_tf_available
():
from
transformers
import
(
AutoConfig
,
BertConfig
,
from
transformers
import
(
AutoConfig
,
BertConfig
,
TFAutoModel
,
TFBertModel
,
TFAutoModel
,
TFBertModel
,
...
@@ -33,12 +34,11 @@ if is_tf_available():
...
@@ -33,12 +34,11 @@ if is_tf_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFAutoModelTest
(
unittest
.
TestCase
):
class
TFAutoModelTest
(
unittest
.
TestCase
):
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
import
h5py
import
h5py
self
.
assertTrue
(
h5py
.
version
.
hdf5_version
.
startswith
(
"1.10"
))
self
.
assertTrue
(
h5py
.
version
.
hdf5_version
.
startswith
(
"1.10"
))
...
@@ -54,7 +54,7 @@ class TFAutoModelTest(unittest.TestCase):
...
@@ -54,7 +54,7 @@ class TFAutoModelTest(unittest.TestCase):
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertModel
)
self
.
assertIsInstance
(
model
,
TFBertModel
)
@
pytest
.
mark
.
slow
@
slow
def
test_lmhead_model_from_pretrained
(
self
):
def
test_lmhead_model_from_pretrained
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
...
@@ -67,7 +67,7 @@ class TFAutoModelTest(unittest.TestCase):
...
@@ -67,7 +67,7 @@ class TFAutoModelTest(unittest.TestCase):
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
@
pytest
.
mark
.
slow
@
slow
def
test_sequence_classification_model_from_pretrained
(
self
):
def
test_sequence_classification_model_from_pretrained
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
...
@@ -80,7 +80,7 @@ class TFAutoModelTest(unittest.TestCase):
...
@@ -80,7 +80,7 @@ class TFAutoModelTest(unittest.TestCase):
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForSequenceClassification
)
self
.
assertIsInstance
(
model
,
TFBertForSequenceClassification
)
@
pytest
.
mark
.
slow
@
slow
def
test_question_answering_model_from_pretrained
(
self
):
def
test_question_answering_model_from_pretrained
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
...
@@ -93,6 +93,11 @@ class TFAutoModelTest(unittest.TestCase):
...
@@ -93,6 +93,11 @@ class TFAutoModelTest(unittest.TestCase):
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForQuestionAnswering
)
self
.
assertIsInstance
(
model
,
TFBertForQuestionAnswering
)
def
test_from_pretrained_identifier
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
model
=
TFAutoModelWithLMHead
.
from_pretrained
(
SMALL_MODEL_IDENTIFIER
,
force_download
=
True
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
transformers/tests/modeling_tf_bert_test.py
View file @
d5712f7c
...
@@ -18,11 +18,11 @@ from __future__ import print_function
...
@@ -18,11 +18,11 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
BertConfig
,
is_tf_available
from
transformers
import
BertConfig
,
is_tf_available
...
@@ -36,10 +36,9 @@ if is_tf_available():
...
@@ -36,10 +36,9 @@ if is_tf_available():
TFBertForTokenClassification
,
TFBertForTokenClassification
,
TFBertForQuestionAnswering
,
TFBertForQuestionAnswering
,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFBertModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFBertModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFBertModel
,
TFBertForMaskedLM
,
TFBertForNextSentencePrediction
,
all_model_classes
=
(
TFBertModel
,
TFBertForMaskedLM
,
TFBertForNextSentencePrediction
,
...
@@ -309,7 +308,7 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -309,7 +308,7 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_token_classification
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_bert_for_token_classification
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
...
...
transformers/tests/modeling_tf_common_test.py
View file @
d5712f7c
...
@@ -25,18 +25,17 @@ import unittest
...
@@ -25,18 +25,17 @@ import unittest
import
uuid
import
uuid
import
tempfile
import
tempfile
import
pytest
import
sys
import
sys
from
transformers
import
is_tf_available
,
is_torch_available
from
transformers
import
is_tf_available
,
is_torch_available
from
.utils
import
require_tf
,
slow
if
is_tf_available
():
if
is_tf_available
():
import
tensorflow
as
tf
import
tensorflow
as
tf
import
numpy
as
np
import
numpy
as
np
from
transformers
import
TFPreTrainedModel
from
transformers
import
TFPreTrainedModel
# from transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
# from transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
if
sys
.
version_info
[
0
]
==
2
:
if
sys
.
version_info
[
0
]
==
2
:
import
cPickle
as
pickle
import
cPickle
as
pickle
...
@@ -62,6 +61,7 @@ def _config_zero_init(config):
...
@@ -62,6 +61,7 @@ def _config_zero_init(config):
class
TFCommonTestCases
:
class
TFCommonTestCases
:
@
require_tf
class
TFCommonModelTester
(
unittest
.
TestCase
):
class
TFCommonModelTester
(
unittest
.
TestCase
):
model_tester
=
None
model_tester
=
None
...
@@ -164,7 +164,7 @@ class TFCommonTestCases:
...
@@ -164,7 +164,7 @@ class TFCommonTestCases:
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
# Prepare our model
# Prepare our model
model
=
model_class
(
config
)
model
=
model_class
(
config
)
# Let's load it from the disk to be sure we can use pretrained weights
# Let's load it from the disk to be sure we can use pretrained weights
with
TemporaryDirectory
()
as
tmpdirname
:
with
TemporaryDirectory
()
as
tmpdirname
:
outputs
=
model
(
inputs_dict
)
# build the model
outputs
=
model
(
inputs_dict
)
# build the model
...
...
transformers/tests/modeling_tf_ctrl_test.py
View file @
d5712f7c
...
@@ -18,11 +18,11 @@ from __future__ import print_function
...
@@ -18,11 +18,11 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
CTRLConfig
,
is_tf_available
from
transformers
import
CTRLConfig
,
is_tf_available
...
@@ -30,10 +30,9 @@ if is_tf_available():
...
@@ -30,10 +30,9 @@ if is_tf_available():
import
tensorflow
as
tf
import
tensorflow
as
tf
from
transformers.modeling_tf_ctrl
import
(
TFCTRLModel
,
TFCTRLLMHeadModel
,
from
transformers.modeling_tf_ctrl
import
(
TFCTRLModel
,
TFCTRLLMHeadModel
,
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFCTRLModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFCTRLModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFCTRLModel
,
TFCTRLLMHeadModel
)
if
is_tf_available
()
else
()
all_model_classes
=
(
TFCTRLModel
,
TFCTRLLMHeadModel
)
if
is_tf_available
()
else
()
...
@@ -188,7 +187,7 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -188,7 +187,7 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_ctrl_lm_head
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_ctrl_lm_head
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/modeling_tf_distilbert_test.py
View file @
d5712f7c
...
@@ -17,10 +17,10 @@ from __future__ import division
...
@@ -17,10 +17,10 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
pytest
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
DistilBertConfig
,
is_tf_available
from
transformers
import
DistilBertConfig
,
is_tf_available
...
@@ -30,10 +30,9 @@ if is_tf_available():
...
@@ -30,10 +30,9 @@ if is_tf_available():
TFDistilBertForMaskedLM
,
TFDistilBertForMaskedLM
,
TFDistilBertForQuestionAnswering
,
TFDistilBertForQuestionAnswering
,
TFDistilBertForSequenceClassification
)
TFDistilBertForSequenceClassification
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFDistilBertModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFDistilBertModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFDistilBertModel
,
TFDistilBertForMaskedLM
,
TFDistilBertForQuestionAnswering
,
all_model_classes
=
(
TFDistilBertModel
,
TFDistilBertForMaskedLM
,
TFDistilBertForQuestionAnswering
,
...
@@ -210,7 +209,7 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -210,7 +209,7 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_distilbert_for_sequence_classification
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_distilbert_for_sequence_classification
(
*
config_and_inputs
)
# @
pytest.mark.
slow
# @slow
# def test_model_from_pretrained(self):
# def test_model_from_pretrained(self):
# cache_dir = "/tmp/transformers_test/"
# cache_dir = "/tmp/transformers_test/"
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
...
...
transformers/tests/modeling_tf_gpt2_test.py
View file @
d5712f7c
...
@@ -18,11 +18,11 @@ from __future__ import print_function
...
@@ -18,11 +18,11 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
GPT2Config
,
is_tf_available
from
transformers
import
GPT2Config
,
is_tf_available
...
@@ -31,10 +31,9 @@ if is_tf_available():
...
@@ -31,10 +31,9 @@ if is_tf_available():
from
transformers.modeling_tf_gpt2
import
(
TFGPT2Model
,
TFGPT2LMHeadModel
,
from
transformers.modeling_tf_gpt2
import
(
TFGPT2Model
,
TFGPT2LMHeadModel
,
TFGPT2DoubleHeadsModel
,
TFGPT2DoubleHeadsModel
,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFGPT2ModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFGPT2ModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFGPT2Model
,
TFGPT2LMHeadModel
,
all_model_classes
=
(
TFGPT2Model
,
TFGPT2LMHeadModel
,
...
@@ -219,7 +218,7 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -219,7 +218,7 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_gpt2_double_head
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_gpt2_double_head
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/modeling_tf_openai_gpt_test.py
View file @
d5712f7c
...
@@ -18,11 +18,11 @@ from __future__ import print_function
...
@@ -18,11 +18,11 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
OpenAIGPTConfig
,
is_tf_available
from
transformers
import
OpenAIGPTConfig
,
is_tf_available
...
@@ -31,10 +31,9 @@ if is_tf_available():
...
@@ -31,10 +31,9 @@ if is_tf_available():
from
transformers.modeling_tf_openai
import
(
TFOpenAIGPTModel
,
TFOpenAIGPTLMHeadModel
,
from
transformers.modeling_tf_openai
import
(
TFOpenAIGPTModel
,
TFOpenAIGPTLMHeadModel
,
TFOpenAIGPTDoubleHeadsModel
,
TFOpenAIGPTDoubleHeadsModel
,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFOpenAIGPTModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFOpenAIGPTModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFOpenAIGPTModel
,
TFOpenAIGPTLMHeadModel
,
all_model_classes
=
(
TFOpenAIGPTModel
,
TFOpenAIGPTLMHeadModel
,
...
@@ -218,7 +217,7 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -218,7 +217,7 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_openai_gpt_double_head
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_openai_gpt_double_head
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/modeling_tf_roberta_test.py
View file @
d5712f7c
...
@@ -18,10 +18,10 @@ from __future__ import print_function
...
@@ -18,10 +18,10 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
RobertaConfig
,
is_tf_available
from
transformers
import
RobertaConfig
,
is_tf_available
...
@@ -32,10 +32,9 @@ if is_tf_available():
...
@@ -32,10 +32,9 @@ if is_tf_available():
TFRobertaForSequenceClassification
,
TFRobertaForSequenceClassification
,
TFRobertaForTokenClassification
,
TFRobertaForTokenClassification
,
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFRobertaModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFRobertaModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFRobertaModel
,
TFRobertaForMaskedLM
,
all_model_classes
=
(
TFRobertaModel
,
TFRobertaForMaskedLM
,
...
@@ -191,7 +190,7 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -191,7 +190,7 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_roberta_for_masked_lm
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_roberta_for_masked_lm
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
@@ -203,10 +202,10 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -203,10 +202,10 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
class
TFRobertaModelIntegrationTest
(
unittest
.
TestCase
):
class
TFRobertaModelIntegrationTest
(
unittest
.
TestCase
):
@
pytest
.
mark
.
slow
@
slow
def
test_inference_masked_lm
(
self
):
def
test_inference_masked_lm
(
self
):
model
=
TFRobertaForMaskedLM
.
from_pretrained
(
'roberta-base'
)
model
=
TFRobertaForMaskedLM
.
from_pretrained
(
'roberta-base'
)
input_ids
=
tf
.
constant
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
input_ids
=
tf
.
constant
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
output
=
model
(
input_ids
)[
0
]
output
=
model
(
input_ids
)[
0
]
expected_shape
=
[
1
,
11
,
50265
]
expected_shape
=
[
1
,
11
,
50265
]
...
@@ -224,10 +223,10 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
...
@@ -224,10 +223,10 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
numpy
.
allclose
(
output
[:,
:
3
,
:
3
].
numpy
(),
expected_slice
.
numpy
(),
atol
=
1e-3
)
numpy
.
allclose
(
output
[:,
:
3
,
:
3
].
numpy
(),
expected_slice
.
numpy
(),
atol
=
1e-3
)
)
)
@
pytest
.
mark
.
slow
@
slow
def
test_inference_no_head
(
self
):
def
test_inference_no_head
(
self
):
model
=
TFRobertaModel
.
from_pretrained
(
'roberta-base'
)
model
=
TFRobertaModel
.
from_pretrained
(
'roberta-base'
)
input_ids
=
tf
.
constant
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
input_ids
=
tf
.
constant
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
output
=
model
(
input_ids
)[
0
]
output
=
model
(
input_ids
)[
0
]
# compare the actual values for a slice.
# compare the actual values for a slice.
...
@@ -240,10 +239,10 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
...
@@ -240,10 +239,10 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
numpy
.
allclose
(
output
[:,
:
3
,
:
3
].
numpy
(),
expected_slice
.
numpy
(),
atol
=
1e-3
)
numpy
.
allclose
(
output
[:,
:
3
,
:
3
].
numpy
(),
expected_slice
.
numpy
(),
atol
=
1e-3
)
)
)
@
pytest
.
mark
.
slow
@
slow
def
test_inference_classification_head
(
self
):
def
test_inference_classification_head
(
self
):
model
=
TFRobertaForSequenceClassification
.
from_pretrained
(
'roberta-large-mnli'
)
model
=
TFRobertaForSequenceClassification
.
from_pretrained
(
'roberta-large-mnli'
)
input_ids
=
tf
.
constant
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
input_ids
=
tf
.
constant
([[
0
,
31414
,
232
,
328
,
740
,
1140
,
12695
,
69
,
46078
,
1588
,
2
]])
output
=
model
(
input_ids
)[
0
]
output
=
model
(
input_ids
)[
0
]
expected_shape
=
[
1
,
3
]
expected_shape
=
[
1
,
3
]
...
...
transformers/tests/modeling_tf_transfo_xl_test.py
View file @
d5712f7c
...
@@ -19,10 +19,10 @@ from __future__ import print_function
...
@@ -19,10 +19,10 @@ from __future__ import print_function
import
unittest
import
unittest
import
random
import
random
import
shutil
import
shutil
import
pytest
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
TransfoXLConfig
,
is_tf_available
from
transformers
import
TransfoXLConfig
,
is_tf_available
...
@@ -31,10 +31,9 @@ if is_tf_available():
...
@@ -31,10 +31,9 @@ if is_tf_available():
from
transformers.modeling_tf_transfo_xl
import
(
TFTransfoXLModel
,
from
transformers.modeling_tf_transfo_xl
import
(
TFTransfoXLModel
,
TFTransfoXLLMHeadModel
,
TFTransfoXLLMHeadModel
,
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
@
require_tf
class
TFTransfoXLModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFTransfoXLModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFTransfoXLModel
,
TFTransfoXLLMHeadModel
)
if
is_tf_available
()
else
()
all_model_classes
=
(
TFTransfoXLModel
,
TFTransfoXLLMHeadModel
)
if
is_tf_available
()
else
()
...
@@ -204,7 +203,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -204,7 +203,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_transfo_xl_lm_head
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_transfo_xl_lm_head
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/modeling_tf_xlm_test.py
View file @
d5712f7c
...
@@ -18,7 +18,6 @@ from __future__ import print_function
...
@@ -18,7 +18,6 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
from
transformers
import
is_tf_available
from
transformers
import
is_tf_available
...
@@ -29,13 +28,13 @@ if is_tf_available():
...
@@ -29,13 +28,13 @@ if is_tf_available():
TFXLMForSequenceClassification
,
TFXLMForSequenceClassification
,
TFXLMForQuestionAnsweringSimple
,
TFXLMForQuestionAnsweringSimple
,
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
@
require_tf
class
TFXLMModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFXLMModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFXLMModel
,
TFXLMWithLMHeadModel
,
all_model_classes
=
(
TFXLMModel
,
TFXLMWithLMHeadModel
,
...
@@ -251,7 +250,7 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -251,7 +250,7 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlm_sequence_classif
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_xlm_sequence_classif
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/modeling_tf_xlnet_test.py
View file @
d5712f7c
...
@@ -21,7 +21,6 @@ import unittest
...
@@ -21,7 +21,6 @@ import unittest
import
json
import
json
import
random
import
random
import
shutil
import
shutil
import
pytest
from
transformers
import
XLNetConfig
,
is_tf_available
from
transformers
import
XLNetConfig
,
is_tf_available
...
@@ -33,12 +32,13 @@ if is_tf_available():
...
@@ -33,12 +32,13 @@ if is_tf_available():
TFXLNetForTokenClassification
,
TFXLNetForTokenClassification
,
TFXLNetForQuestionAnsweringSimple
,
TFXLNetForQuestionAnsweringSimple
,
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
)
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
@
require_tf
class
TFXLNetModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
class
TFXLNetModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFXLNetModel
,
TFXLNetLMHeadModel
,
all_model_classes
=
(
TFXLNetModel
,
TFXLNetLMHeadModel
,
...
@@ -304,7 +304,7 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -304,7 +304,7 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
def
test_xlnet_lm_head
(
self
):
def
test_xlnet_lm_head
(
self
):
self
.
model_tester
.
set_seed
()
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_lm_head
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_xlnet_lm_head
(
*
config_and_inputs
)
def
test_xlnet_sequence_classif
(
self
):
def
test_xlnet_sequence_classif
(
self
):
self
.
model_tester
.
set_seed
()
self
.
model_tester
.
set_seed
()
...
@@ -320,7 +320,7 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -320,7 +320,7 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_qa
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_xlnet_qa
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/modeling_transfo_xl_test.py
View file @
d5712f7c
...
@@ -19,7 +19,6 @@ from __future__ import print_function
...
@@ -19,7 +19,6 @@ from __future__ import print_function
import
unittest
import
unittest
import
random
import
random
import
shutil
import
shutil
import
pytest
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -27,12 +26,13 @@ if is_torch_available():
...
@@ -27,12 +26,13 @@ if is_torch_available():
import
torch
import
torch
from
transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
from
transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
@
require_torch
class
TransfoXLModelTest
(
CommonTestCases
.
CommonModelTester
):
class
TransfoXLModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
TransfoXLModel
,
TransfoXLLMHeadModel
)
if
is_torch_available
()
else
()
all_model_classes
=
(
TransfoXLModel
,
TransfoXLLMHeadModel
)
if
is_torch_available
()
else
()
...
@@ -111,6 +111,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester):
...
@@ -111,6 +111,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester):
def
create_transfo_xl_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
def
create_transfo_xl_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TransfoXLModel
(
config
)
model
=
TransfoXLModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
hidden_states_1
,
mems_1
=
model
(
input_ids_1
)
hidden_states_1
,
mems_1
=
model
(
input_ids_1
)
...
@@ -140,6 +141,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester):
...
@@ -140,6 +141,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester):
def
create_transfo_xl_lm_head
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
def
create_transfo_xl_lm_head
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TransfoXLLMHeadModel
(
config
)
model
=
TransfoXLLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
lm_logits_1
,
mems_1
=
model
(
input_ids_1
)
lm_logits_1
,
mems_1
=
model
(
input_ids_1
)
...
@@ -204,7 +206,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester):
...
@@ -204,7 +206,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester):
output_result
=
self
.
model_tester
.
create_transfo_xl_lm_head
(
*
config_and_inputs
)
output_result
=
self
.
model_tester
.
create_transfo_xl_lm_head
(
*
config_and_inputs
)
self
.
model_tester
.
check_transfo_xl_lm_head_output
(
output_result
)
self
.
model_tester
.
check_transfo_xl_lm_head_output
(
output_result
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/modeling_xlm_test.py
View file @
d5712f7c
...
@@ -18,7 +18,6 @@ from __future__ import print_function
...
@@ -18,7 +18,6 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -26,13 +25,13 @@ if is_torch_available():
...
@@ -26,13 +25,13 @@ if is_torch_available():
from
transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
from
transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
,
XLMForQuestionAnsweringSimple
)
XLMForSequenceClassification
,
XLMForQuestionAnsweringSimple
)
from
transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from
transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
@
require_torch
class
XLMModelTest
(
CommonTestCases
.
CommonModelTester
):
class
XLMModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
all_model_classes
=
(
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
...
@@ -148,6 +147,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
...
@@ -148,6 +147,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlm_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
def
create_and_check_xlm_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
model
=
XLMModel
(
config
=
config
)
model
=
XLMModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
,
lengths
=
input_lengths
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
,
lengths
=
input_lengths
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
,
langs
=
token_type_ids
)
...
@@ -163,6 +163,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
...
@@ -163,6 +163,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlm_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
def
create_and_check_xlm_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
model
=
XLMWithLMHeadModel
(
config
)
model
=
XLMWithLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
loss
,
logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
...
@@ -182,6 +183,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
...
@@ -182,6 +183,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlm_simple_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
def
create_and_check_xlm_simple_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
model
=
XLMForQuestionAnsweringSimple
(
config
)
model
=
XLMForQuestionAnsweringSimple
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
)
outputs
=
model
(
input_ids
)
...
@@ -206,6 +208,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
...
@@ -206,6 +208,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlm_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
def
create_and_check_xlm_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
model
=
XLMForQuestionAnswering
(
config
)
model
=
XLMForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
)
outputs
=
model
(
input_ids
)
...
@@ -260,6 +263,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
...
@@ -260,6 +263,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlm_sequence_classif
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
def
create_and_check_xlm_sequence_classif
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
model
=
XLMForSequenceClassification
(
config
)
model
=
XLMForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
(
logits
,)
=
model
(
input_ids
)
(
logits
,)
=
model
(
input_ids
)
...
@@ -312,7 +316,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
...
@@ -312,7 +316,7 @@ class XLMModelTest(CommonTestCases.CommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlm_sequence_classif
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_xlm_sequence_classif
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/modeling_xlnet_test.py
View file @
d5712f7c
...
@@ -21,7 +21,6 @@ import unittest
...
@@ -21,7 +21,6 @@ import unittest
import
json
import
json
import
random
import
random
import
shutil
import
shutil
import
pytest
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -31,12 +30,13 @@ if is_torch_available():
...
@@ -31,12 +30,13 @@ if is_torch_available():
from
transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
from
transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForTokenClassification
,
XLNetForQuestionAnswering
)
XLNetForTokenClassification
,
XLNetForQuestionAnswering
)
from
transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
from
transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
@
require_torch
class
XLNetModelTest
(
CommonTestCases
.
CommonModelTester
):
class
XLNetModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
XLNetModel
,
XLNetLMHeadModel
,
XLNetForTokenClassification
,
all_model_classes
=
(
XLNetModel
,
XLNetLMHeadModel
,
XLNetForTokenClassification
,
...
@@ -100,9 +100,9 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -100,9 +100,9 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
).
float
()
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
).
float
()
input_ids_q
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
+
1
],
self
.
vocab_size
)
input_ids_q
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
+
1
],
self
.
vocab_size
)
perm_mask
=
torch
.
zeros
(
self
.
batch_size
,
self
.
seq_length
+
1
,
self
.
seq_length
+
1
,
dtype
=
torch
.
float
)
perm_mask
=
torch
.
zeros
(
self
.
batch_size
,
self
.
seq_length
+
1
,
self
.
seq_length
+
1
,
dtype
=
torch
.
float
,
device
=
torch_device
)
perm_mask
[:,
:,
-
1
]
=
1.0
# Previous tokens don't see last token
perm_mask
[:,
:,
-
1
]
=
1.0
# Previous tokens don't see last token
target_mapping
=
torch
.
zeros
(
self
.
batch_size
,
1
,
self
.
seq_length
+
1
,
dtype
=
torch
.
float
)
target_mapping
=
torch
.
zeros
(
self
.
batch_size
,
1
,
self
.
seq_length
+
1
,
dtype
=
torch
.
float
,
device
=
torch_device
)
target_mapping
[:,
0
,
-
1
]
=
1.0
# predict last token
target_mapping
[:,
0
,
-
1
]
=
1.0
# predict last token
sequence_labels
=
None
sequence_labels
=
None
...
@@ -141,6 +141,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -141,6 +141,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlnet_base_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
def
create_and_check_xlnet_base_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
model
=
XLNetModel
(
config
)
model
=
XLNetModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
_
,
_
=
model
(
input_ids_1
,
input_mask
=
input_mask
)
_
,
_
=
model
(
input_ids_1
,
input_mask
=
input_mask
)
...
@@ -155,6 +156,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -155,6 +156,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
config
.
mem_len
=
0
config
.
mem_len
=
0
model
=
XLNetModel
(
config
)
model
=
XLNetModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
no_mems_outputs
=
model
(
input_ids_1
)
no_mems_outputs
=
model
(
input_ids_1
)
self
.
parent
.
assertEqual
(
len
(
no_mems_outputs
),
1
)
self
.
parent
.
assertEqual
(
len
(
no_mems_outputs
),
1
)
...
@@ -169,6 +171,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -169,6 +171,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlnet_base_model_with_att_output
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
def
create_and_check_xlnet_base_model_with_att_output
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
model
=
XLNetModel
(
config
)
model
=
XLNetModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
_
,
_
,
attentions
=
model
(
input_ids_1
,
target_mapping
=
target_mapping
)
_
,
_
,
attentions
=
model
(
input_ids_1
,
target_mapping
=
target_mapping
)
...
@@ -181,6 +184,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -181,6 +184,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlnet_lm_head
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
def
create_and_check_xlnet_lm_head
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
model
=
XLNetLMHeadModel
(
config
)
model
=
XLNetLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
loss_1
,
all_logits_1
,
mems_1
=
model
(
input_ids_1
,
token_type_ids
=
segment_ids
,
labels
=
lm_labels
)
loss_1
,
all_logits_1
,
mems_1
=
model
(
input_ids_1
,
token_type_ids
=
segment_ids
,
labels
=
lm_labels
)
...
@@ -221,6 +225,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -221,6 +225,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlnet_qa
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
def
create_and_check_xlnet_qa
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
model
=
XLNetForQuestionAnswering
(
config
)
model
=
XLNetForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids_1
)
outputs
=
model
(
input_ids_1
)
...
@@ -279,6 +284,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -279,6 +284,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlnet_token_classif
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
def
create_and_check_xlnet_token_classif
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
model
=
XLNetForTokenClassification
(
config
)
model
=
XLNetForTokenClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
logits
,
mems_1
=
model
(
input_ids_1
)
logits
,
mems_1
=
model
(
input_ids_1
)
...
@@ -311,6 +317,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -311,6 +317,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
def
create_and_check_xlnet_sequence_classif
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
def
create_and_check_xlnet_sequence_classif
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
):
model
=
XLNetForSequenceClassification
(
config
)
model
=
XLNetForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
logits
,
mems_1
=
model
(
input_ids_1
)
logits
,
mems_1
=
model
(
input_ids_1
)
...
@@ -362,7 +369,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -362,7 +369,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
def
test_xlnet_lm_head
(
self
):
def
test_xlnet_lm_head
(
self
):
self
.
model_tester
.
set_seed
()
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_lm_head
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_xlnet_lm_head
(
*
config_and_inputs
)
def
test_xlnet_sequence_classif
(
self
):
def
test_xlnet_sequence_classif
(
self
):
self
.
model_tester
.
set_seed
()
self
.
model_tester
.
set_seed
()
...
@@ -379,7 +386,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
...
@@ -379,7 +386,7 @@ class XLNetModelTest(CommonTestCases.CommonModelTester):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_qa
(
*
config_and_inputs
)
self
.
model_tester
.
create_and_check_xlnet_qa
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
transformers/tests/optimization_test.py
View file @
d5712f7c
...
@@ -18,7 +18,6 @@ from __future__ import print_function
...
@@ -18,7 +18,6 @@ from __future__ import print_function
import
unittest
import
unittest
import
os
import
os
import
pytest
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -31,10 +30,9 @@ if is_torch_available():
...
@@ -31,10 +30,9 @@ if is_torch_available():
get_cosine_schedule_with_warmup
,
get_cosine_schedule_with_warmup
,
get_cosine_with_hard_restarts_schedule_with_warmup
,
get_cosine_with_hard_restarts_schedule_with_warmup
,
get_linear_schedule_with_warmup
)
get_linear_schedule_with_warmup
)
else
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.tokenization_tests_commons
import
TemporaryDirectory
from
.tokenization_tests_commons
import
TemporaryDirectory
from
.utils
import
require_torch
def
unwrap_schedule
(
scheduler
,
num_steps
=
10
):
def
unwrap_schedule
(
scheduler
,
num_steps
=
10
):
...
@@ -58,6 +56,7 @@ def unwrap_and_save_reload_schedule(scheduler, num_steps=10):
...
@@ -58,6 +56,7 @@ def unwrap_and_save_reload_schedule(scheduler, num_steps=10):
scheduler
.
load_state_dict
(
state_dict
)
scheduler
.
load_state_dict
(
state_dict
)
return
lrs
return
lrs
@
require_torch
class
OptimizationTest
(
unittest
.
TestCase
):
class
OptimizationTest
(
unittest
.
TestCase
):
def
assertListAlmostEqual
(
self
,
list1
,
list2
,
tol
):
def
assertListAlmostEqual
(
self
,
list1
,
list2
,
tol
):
...
@@ -80,6 +79,7 @@ class OptimizationTest(unittest.TestCase):
...
@@ -80,6 +79,7 @@ class OptimizationTest(unittest.TestCase):
self
.
assertListAlmostEqual
(
w
.
tolist
(),
[
0.4
,
0.2
,
-
0.5
],
tol
=
1e-2
)
self
.
assertListAlmostEqual
(
w
.
tolist
(),
[
0.4
,
0.2
,
-
0.5
],
tol
=
1e-2
)
@
require_torch
class
ScheduleInitTest
(
unittest
.
TestCase
):
class
ScheduleInitTest
(
unittest
.
TestCase
):
m
=
torch
.
nn
.
Linear
(
50
,
50
)
if
is_torch_available
()
else
None
m
=
torch
.
nn
.
Linear
(
50
,
50
)
if
is_torch_available
()
else
None
optimizer
=
AdamW
(
m
.
parameters
(),
lr
=
10.
)
if
is_torch_available
()
else
None
optimizer
=
AdamW
(
m
.
parameters
(),
lr
=
10.
)
if
is_torch_available
()
else
None
...
...
transformers/tests/optimization_tf_test.py
0 → 100644
View file @
d5712f7c
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
from
transformers
import
is_tf_available
from
.utils
import
require_tf
if
is_tf_available
():
import
tensorflow
as
tf
from
tensorflow.python.eager
import
context
from
tensorflow.python.framework
import
ops
from
transformers
import
(
create_optimizer
,
GradientAccumulator
)
@
require_tf
class
OptimizationFTest
(
unittest
.
TestCase
):
def
assertListAlmostEqual
(
self
,
list1
,
list2
,
tol
):
self
.
assertEqual
(
len
(
list1
),
len
(
list2
))
for
a
,
b
in
zip
(
list1
,
list2
):
self
.
assertAlmostEqual
(
a
,
b
,
delta
=
tol
)
def
testGradientAccumulator
(
self
):
accumulator
=
GradientAccumulator
()
accumulator
([
tf
.
constant
([
1.0
,
2.0
])])
accumulator
([
tf
.
constant
([
-
2.0
,
1.0
])])
accumulator
([
tf
.
constant
([
-
1.0
,
2.0
])])
with
self
.
assertRaises
(
ValueError
):
accumulator
([
tf
.
constant
([
1.0
,
1.0
]),
tf
.
constant
([
2.0
,
2.0
])])
self
.
assertEqual
(
accumulator
.
step
,
3
)
self
.
assertEqual
(
len
(
accumulator
.
gradients
),
1
)
self
.
assertListAlmostEqual
(
accumulator
.
gradients
[
0
].
numpy
().
tolist
(),
[
-
2.0
,
5.0
],
tol
=
1e-2
)
accumulator
.
reset
()
self
.
assertEqual
(
accumulator
.
step
,
0
)
self
.
assertListAlmostEqual
(
accumulator
.
gradients
[
0
].
numpy
().
tolist
(),
[
0.0
,
0.0
],
tol
=
1e-2
)
def
testGradientAccumulatorDistributionStrategy
(
self
):
context
.
_context
=
None
ops
.
enable_eager_execution_internal
()
physical_devices
=
tf
.
config
.
experimental
.
list_physical_devices
(
"CPU"
)
tf
.
config
.
experimental
.
set_virtual_device_configuration
(
physical_devices
[
0
],
[
tf
.
config
.
experimental
.
VirtualDeviceConfiguration
(),
tf
.
config
.
experimental
.
VirtualDeviceConfiguration
()])
devices
=
tf
.
config
.
experimental
.
list_logical_devices
(
device_type
=
"CPU"
)
strategy
=
tf
.
distribute
.
MirroredStrategy
(
devices
=
[
device
.
name
for
device
in
devices
])
with
strategy
.
scope
():
accumulator
=
GradientAccumulator
()
variable
=
tf
.
Variable
([
4.0
,
3.0
])
optimizer
=
create_optimizer
(
5e-5
,
10
,
5
)
gradient_placeholder
=
tf
.
Variable
([
0.0
,
0.0
],
trainable
=
False
)
def
accumulate_on_replica
(
gradient
):
accumulator
([
gradient
])
def
apply_on_replica
():
optimizer
.
apply_gradients
(
list
(
zip
(
accumulator
.
gradients
,
[
variable
])),
1.0
)
@
tf
.
function
def
accumulate
(
grad1
,
grad2
):
with
strategy
.
scope
():
gradient_placeholder
.
values
[
0
].
assign
(
grad1
)
gradient_placeholder
.
values
[
1
].
assign
(
grad2
)
strategy
.
experimental_run_v2
(
accumulate_on_replica
,
args
=
(
gradient_placeholder
,))
@
tf
.
function
def
apply_grad
():
with
strategy
.
scope
():
strategy
.
experimental_run_v2
(
apply_on_replica
)
accumulate
([
1.0
,
2.0
],
[
-
1.0
,
1.0
])
accumulate
([
3.0
,
-
1.0
],
[
-
1.0
,
-
1.0
])
accumulate
([
-
2.0
,
2.0
],
[
3.0
,
-
2.0
])
self
.
assertEqual
(
accumulator
.
step
,
3
)
self
.
assertListAlmostEqual
(
accumulator
.
_gradients
[
0
].
values
[
0
].
value
().
numpy
().
tolist
(),
[
2.0
,
3.0
],
tol
=
1e-2
)
self
.
assertListAlmostEqual
(
accumulator
.
_gradients
[
0
].
values
[
1
].
value
().
numpy
().
tolist
(),
[
1.0
,
-
2.0
],
tol
=
1e-2
)
apply_grad
()
self
.
assertListAlmostEqual
(
variable
.
value
().
numpy
().
tolist
(),
[
4.0
,
3.0
],
tol
=
1e-2
)
accumulator
.
reset
()
self
.
assertEqual
(
accumulator
.
step
,
0
)
self
.
assertListAlmostEqual
(
accumulator
.
_gradients
[
0
].
values
[
0
].
value
().
numpy
().
tolist
(),
[
0.0
,
0.0
],
tol
=
1e-2
)
self
.
assertListAlmostEqual
(
accumulator
.
_gradients
[
0
].
values
[
1
].
value
().
numpy
().
tolist
(),
[
0.0
,
0.0
],
tol
=
1e-2
)
if
__name__
==
"__main__"
:
unittest
.
main
()
\ No newline at end of file
transformers/tests/tokenization_auto_test.py
View file @
d5712f7c
...
@@ -18,15 +18,16 @@ from __future__ import print_function
...
@@ -18,15 +18,16 @@ from __future__ import print_function
import
unittest
import
unittest
import
shutil
import
shutil
import
pytest
import
logging
import
logging
from
transformers
import
AutoTokenizer
,
BertTokenizer
,
AutoTokenizer
,
GPT2Tokenizer
from
transformers
import
AutoTokenizer
,
BertTokenizer
,
AutoTokenizer
,
GPT2Tokenizer
from
transformers
import
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from
transformers
import
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from
.utils
import
slow
,
SMALL_MODEL_IDENTIFIER
class
AutoTokenizerTest
(
unittest
.
TestCase
):
class
AutoTokenizerTest
(
unittest
.
TestCase
):
@
pytest
.
mark
.
slow
@
slow
def
test_tokenizer_from_pretrained
(
self
):
def
test_tokenizer_from_pretrained
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
for
model_name
in
list
(
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
.
keys
())[:
1
]:
...
@@ -41,6 +42,11 @@ class AutoTokenizerTest(unittest.TestCase):
...
@@ -41,6 +42,11 @@ class AutoTokenizerTest(unittest.TestCase):
self
.
assertIsInstance
(
tokenizer
,
GPT2Tokenizer
)
self
.
assertIsInstance
(
tokenizer
,
GPT2Tokenizer
)
self
.
assertGreater
(
len
(
tokenizer
),
0
)
self
.
assertGreater
(
len
(
tokenizer
),
0
)
def
test_tokenizer_from_pretrained_identifier
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
SMALL_MODEL_IDENTIFIER
)
self
.
assertIsInstance
(
tokenizer
,
BertTokenizer
)
self
.
assertEqual
(
len
(
tokenizer
),
12
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
transformers/tests/tokenization_bert_japanese_test.py
0 → 100644
View file @
d5712f7c
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
os
import
unittest
from
io
import
open
from
transformers.tokenization_bert
import
WordpieceTokenizer
from
transformers.tokenization_bert_japanese
import
(
BertJapaneseTokenizer
,
MecabTokenizer
,
CharacterTokenizer
,
VOCAB_FILES_NAMES
)
from
.tokenization_tests_commons
import
CommonTestCases
from
.utils
import
slow
,
custom_tokenizers
@
custom_tokenizers
class
BertJapaneseTokenizationTest
(
CommonTestCases
.
CommonTokenizerTester
):
tokenizer_class
=
BertJapaneseTokenizer
def
setUp
(
self
):
super
(
BertJapaneseTokenizationTest
,
self
).
setUp
()
vocab_tokens
=
[
u
"[UNK]"
,
u
"[CLS]"
,
u
"[SEP]"
,
u
"こんにちは"
,
u
"こん"
,
u
"にちは"
,
u
"ばんは"
,
u
"##こん"
,
u
"##にちは"
,
u
"##ばんは"
,
u
"世界"
,
u
"##世界"
,
u
"、"
,
u
"##、"
,
u
"。"
,
u
"##。"
]
self
.
vocab_file
=
os
.
path
.
join
(
self
.
tmpdirname
,
VOCAB_FILES_NAMES
[
"vocab_file"
])
with
open
(
self
.
vocab_file
,
"w"
,
encoding
=
"utf-8"
)
as
vocab_writer
:
vocab_writer
.
write
(
""
.
join
([
x
+
"
\n
"
for
x
in
vocab_tokens
]))
def
get_tokenizer
(
self
,
**
kwargs
):
return
BertJapaneseTokenizer
.
from_pretrained
(
self
.
tmpdirname
,
**
kwargs
)
def
get_input_output_texts
(
self
):
input_text
=
u
"こんにちは、世界。
\n
こんばんは、世界。"
output_text
=
u
"こんにちは 、 世界 。 こんばんは 、 世界 。"
return
input_text
,
output_text
def
test_full_tokenizer
(
self
):
tokenizer
=
self
.
tokenizer_class
(
self
.
vocab_file
)
tokens
=
tokenizer
.
tokenize
(
u
"こんにちは、世界。
\n
こんばんは、世界。"
)
self
.
assertListEqual
(
tokens
,
[
u
"こんにちは"
,
u
"、"
,
u
"世界"
,
u
"。"
,
u
"こん"
,
u
"##ばんは"
,
u
"、"
,
u
"世界"
,
"。"
])
self
.
assertListEqual
(
tokenizer
.
convert_tokens_to_ids
(
tokens
),
[
3
,
12
,
10
,
14
,
4
,
9
,
12
,
10
,
14
])
def
test_mecab_tokenizer
(
self
):
tokenizer
=
MecabTokenizer
()
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
"
\t
アップルストアでiPhone8 が
\n
発売された 。 "
),
[
u
"アップルストア"
,
u
"で"
,
u
"iPhone"
,
u
"8"
,
u
"が"
,
u
"発売"
,
u
"さ"
,
u
"れ"
,
u
"た"
,
u
"。"
])
def
test_mecab_tokenizer_lower
(
self
):
tokenizer
=
MecabTokenizer
(
do_lower_case
=
True
)
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
"
\t
アップルストアでiPhone8 が
\n
発売された 。 "
),
[
u
"アップルストア"
,
u
"で"
,
u
"iphone"
,
u
"8"
,
u
"が"
,
u
"発売"
,
u
"さ"
,
u
"れ"
,
u
"た"
,
u
"。"
])
def
test_mecab_tokenizer_no_normalize
(
self
):
tokenizer
=
MecabTokenizer
(
normalize_text
=
False
)
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
"
\t
アップルストアでiPhone8 が
\n
発売された 。 "
),
[
u
"アップルストア"
,
u
"で"
,
u
"iPhone"
,
u
"8"
,
u
"が"
,
u
"発売"
,
u
"さ"
,
u
"れ"
,
u
"た"
,
u
" "
,
u
"。"
])
def
test_wordpiece_tokenizer
(
self
):
vocab_tokens
=
[
u
"[UNK]"
,
u
"[CLS]"
,
u
"[SEP]"
,
u
"こんにちは"
,
u
"こん"
,
u
"にちは"
u
"ばんは"
,
u
"##こん"
,
u
"##にちは"
,
u
"##ばんは"
]
vocab
=
{}
for
(
i
,
token
)
in
enumerate
(
vocab_tokens
):
vocab
[
token
]
=
i
tokenizer
=
WordpieceTokenizer
(
vocab
=
vocab
,
unk_token
=
u
"[UNK]"
)
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
""
),
[])
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
"こんにちは"
),
[
u
"こんにちは"
])
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
"こんばんは"
),
[
u
"こん"
,
u
"##ばんは"
])
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
"こんばんは こんばんにちは こんにちは"
),
[
u
"こん"
,
u
"##ばんは"
,
u
"[UNK]"
,
u
"こんにちは"
])
@
slow
def
test_sequence_builders
(
self
):
tokenizer
=
self
.
tokenizer_class
.
from_pretrained
(
"bert-base-japanese"
)
text
=
tokenizer
.
encode
(
u
"ありがとう。"
,
add_special_tokens
=
False
)
text_2
=
tokenizer
.
encode
(
u
"どういたしまして。"
,
add_special_tokens
=
False
)
encoded_sentence
=
tokenizer
.
build_inputs_with_special_tokens
(
text
)
encoded_pair
=
tokenizer
.
build_inputs_with_special_tokens
(
text
,
text_2
)
# 2 is for "[CLS]", 3 is for "[SEP]"
assert
encoded_sentence
==
[
2
]
+
text
+
[
3
]
assert
encoded_pair
==
[
2
]
+
text
+
[
3
]
+
text_2
+
[
3
]
class
BertJapaneseCharacterTokenizationTest
(
CommonTestCases
.
CommonTokenizerTester
):
tokenizer_class
=
BertJapaneseTokenizer
def
setUp
(
self
):
super
(
BertJapaneseCharacterTokenizationTest
,
self
).
setUp
()
vocab_tokens
=
[
u
"[UNK]"
,
u
"[CLS]"
,
u
"[SEP]"
,
u
"こ"
,
u
"ん"
,
u
"に"
,
u
"ち"
,
u
"は"
,
u
"ば"
,
u
"世"
,
u
"界"
,
u
"、"
,
u
"。"
]
self
.
vocab_file
=
os
.
path
.
join
(
self
.
tmpdirname
,
VOCAB_FILES_NAMES
[
"vocab_file"
])
with
open
(
self
.
vocab_file
,
"w"
,
encoding
=
"utf-8"
)
as
vocab_writer
:
vocab_writer
.
write
(
""
.
join
([
x
+
"
\n
"
for
x
in
vocab_tokens
]))
def
get_tokenizer
(
self
,
**
kwargs
):
return
BertJapaneseTokenizer
.
from_pretrained
(
self
.
tmpdirname
,
subword_tokenizer_type
=
"character"
,
**
kwargs
)
def
get_input_output_texts
(
self
):
input_text
=
u
"こんにちは、世界。
\n
こんばんは、世界。"
output_text
=
u
"こ ん に ち は 、 世 界 。 こ ん ば ん は 、 世 界 。"
return
input_text
,
output_text
def
test_full_tokenizer
(
self
):
tokenizer
=
self
.
tokenizer_class
(
self
.
vocab_file
,
subword_tokenizer_type
=
"character"
)
tokens
=
tokenizer
.
tokenize
(
u
"こんにちは、世界。
\n
こんばんは、世界。"
)
self
.
assertListEqual
(
tokens
,
[
u
"こ"
,
u
"ん"
,
u
"に"
,
u
"ち"
,
u
"は"
,
u
"、"
,
u
"世"
,
u
"界"
,
u
"。"
,
u
"こ"
,
u
"ん"
,
u
"ば"
,
u
"ん"
,
u
"は"
,
u
"、"
,
u
"世"
,
u
"界"
,
u
"。"
])
self
.
assertListEqual
(
tokenizer
.
convert_tokens_to_ids
(
tokens
),
[
3
,
4
,
5
,
6
,
7
,
11
,
9
,
10
,
12
,
3
,
4
,
8
,
4
,
7
,
11
,
9
,
10
,
12
])
def
test_character_tokenizer
(
self
):
vocab_tokens
=
[
u
"[UNK]"
,
u
"[CLS]"
,
u
"[SEP]"
,
u
"こ"
,
u
"ん"
,
u
"に"
,
u
"ち"
,
u
"は"
,
u
"ば"
,
u
"世"
,
u
"界"
u
"、"
,
u
"。"
]
vocab
=
{}
for
(
i
,
token
)
in
enumerate
(
vocab_tokens
):
vocab
[
token
]
=
i
tokenizer
=
CharacterTokenizer
(
vocab
=
vocab
,
unk_token
=
u
"[UNK]"
)
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
""
),
[])
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
"こんにちは"
),
[
u
"こ"
,
u
"ん"
,
u
"に"
,
u
"ち"
,
u
"は"
])
self
.
assertListEqual
(
tokenizer
.
tokenize
(
u
"こんにちほ"
),
[
u
"こ"
,
u
"ん"
,
u
"に"
,
u
"ち"
,
u
"[UNK]"
])
@
slow
def
test_sequence_builders
(
self
):
tokenizer
=
self
.
tokenizer_class
.
from_pretrained
(
"bert-base-japanese-char"
)
text
=
tokenizer
.
encode
(
u
"ありがとう。"
,
add_special_tokens
=
False
)
text_2
=
tokenizer
.
encode
(
u
"どういたしまして。"
,
add_special_tokens
=
False
)
encoded_sentence
=
tokenizer
.
build_inputs_with_special_tokens
(
text
)
encoded_pair
=
tokenizer
.
build_inputs_with_special_tokens
(
text
,
text_2
)
# 2 is for "[CLS]", 3 is for "[SEP]"
assert
encoded_sentence
==
[
2
]
+
text
+
[
3
]
assert
encoded_pair
==
[
2
]
+
text
+
[
3
]
+
text_2
+
[
3
]
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment