Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
518307df
Commit
518307df
authored
Sep 05, 2019
by
thomwolf
Browse files
test suite independent of framework
parent
9d0a11a6
Changes
20
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
593 additions
and
259 deletions
+593
-259
.circleci/config.yml
.circleci/config.yml
+4
-5
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+14
-7
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
+23
-14
pytorch_transformers/modeling_tf_bert.py
pytorch_transformers/modeling_tf_bert.py
+321
-37
pytorch_transformers/tests/modeling_auto_test.py
pytorch_transformers/tests/modeling_auto_test.py
+11
-8
pytorch_transformers/tests/modeling_bert_test.py
pytorch_transformers/tests/modeling_bert_test.py
+11
-6
pytorch_transformers/tests/modeling_common_test.py
pytorch_transformers/tests/modeling_common_test.py
+8
-4
pytorch_transformers/tests/modeling_distilbert_test.py
pytorch_transformers/tests/modeling_distilbert_test.py
+9
-3
pytorch_transformers/tests/modeling_gpt2_test.py
pytorch_transformers/tests/modeling_gpt2_test.py
+7
-3
pytorch_transformers/tests/modeling_openai_test.py
pytorch_transformers/tests/modeling_openai_test.py
+7
-3
pytorch_transformers/tests/modeling_roberta_test.py
pytorch_transformers/tests/modeling_roberta_test.py
+9
-4
pytorch_transformers/tests/modeling_tf_bert_test.py
pytorch_transformers/tests/modeling_tf_bert_test.py
+108
-127
pytorch_transformers/tests/modeling_tf_common_test.py
pytorch_transformers/tests/modeling_tf_common_test.py
+1
-8
pytorch_transformers/tests/modeling_transfo_xl_test.py
pytorch_transformers/tests/modeling_transfo_xl_test.py
+8
-4
pytorch_transformers/tests/modeling_xlm_test.py
pytorch_transformers/tests/modeling_xlm_test.py
+11
-5
pytorch_transformers/tests/modeling_xlnet_test.py
pytorch_transformers/tests/modeling_xlnet_test.py
+9
-4
pytorch_transformers/tests/optimization_test.py
pytorch_transformers/tests/optimization_test.py
+11
-5
pytorch_transformers/tests/tokenization_auto_test.py
pytorch_transformers/tests/tokenization_auto_test.py
+3
-4
pytorch_transformers/tests/tokenization_transfo_xl_test.py
pytorch_transformers/tests/tokenization_transfo_xl_test.py
+9
-3
pytorch_transformers/tokenization_transfo_xl.py
pytorch_transformers/tokenization_transfo_xl.py
+9
-5
No files found.
.circleci/config.yml
View file @
518307df
...
...
@@ -10,7 +10,7 @@ jobs:
-
checkout
-
run
:
sudo pip install torch
-
run
:
sudo pip install --progress-bar off .
-
run
:
sudo pip install pytest codecov pytest-cov
-
run
:
sudo pip install pytest
==5.0.1
codecov pytest-cov
-
run
:
sudo pip install tensorboardX scikit-learn
-
run
:
python -m pytest -sv ./pytorch_transformers/tests/ --cov
-
run
:
python -m pytest -sv ./examples/
...
...
@@ -25,10 +25,9 @@ jobs:
-
checkout
-
run
:
sudo pip install tensorflow==2.0.0-rc0
-
run
:
sudo pip install --progress-bar off .
-
run
:
sudo pip install pytest codecov pytest-cov
-
run
:
sudo pip install pytest
==5.0.1
codecov pytest-cov
-
run
:
sudo pip install tensorboardX scikit-learn
-
run
:
python -m pytest -sv ./pytorch_transformers/tests/ --cov
-
run
:
python -m pytest -sv ./examples/
-
run
:
codecov
build_py2_torch
:
working_directory
:
~/pytorch-transformers
...
...
@@ -40,7 +39,7 @@ jobs:
-
checkout
-
run
:
sudo pip install torch
-
run
:
sudo pip install --progress-bar off .
-
run
:
sudo pip install pytest codecov pytest-cov
-
run
:
sudo pip install pytest
==5.0.1
codecov pytest-cov
-
run
:
python -m pytest -sv ./pytorch_transformers/tests/ --cov
-
run
:
codecov
build_py2_tf
:
...
...
@@ -53,7 +52,7 @@ jobs:
-
checkout
-
run
:
sudo pip install tensorflow==2.0.0-rc0
-
run
:
sudo pip install --progress-bar off .
-
run
:
sudo pip install pytest codecov pytest-cov
-
run
:
sudo pip install pytest
==5.0.1
codecov pytest-cov
-
run
:
python -m pytest -sv ./pytorch_transformers/tests/ --cov
-
run
:
codecov
deploy_doc
:
...
...
pytorch_transformers/__init__.py
View file @
518307df
...
...
@@ -43,11 +43,11 @@ from .configuration_distilbert import DistilBertConfig, DISTILBERT_PRETRAINED_CO
# Modeling
try
:
import
torch
torch_available
=
True
# pylint: disable=invalid-name
_
torch_available
=
True
# pylint: disable=invalid-name
except
ImportError
:
torch_available
=
False
# pylint: disable=invalid-name
_
torch_available
=
False
# pylint: disable=invalid-name
if
torch_available
:
if
_
torch_available
:
logger
.
info
(
"PyTorch version {} available."
.
format
(
torch
.
__version__
))
from
.modeling_utils
import
(
PreTrainedModel
,
prune_layer
,
Conv1D
)
...
...
@@ -87,19 +87,26 @@ if torch_available:
# TensorFlow
try
:
import
tensorflow
as
tf
tf_available
=
True
# pylint: disable=invalid-name
assert
int
(
tf
.
__version__
[
0
])
>=
2
_tf_available
=
True
# pylint: disable=invalid-name
except
ImportError
:
tf_available
=
False
# pylint: disable=invalid-name
_
tf_available
=
False
# pylint: disable=invalid-name
if
tf_available
:
if
_
tf_available
:
logger
.
info
(
"TensorFlow version {} available."
.
format
(
tf
.
__version__
))
from
.modeling_tf_utils
import
TFPreTrainedModel
from
.modeling_tf_bert
import
(
TFBertPreTrainedModel
,
TFBertModel
,
TFBertForPreTraining
,
TFBertForMaskedLM
,
TFBertForNextSentencePrediction
,
load_pt_weights_in_
ber
t
)
TFBertForMaskedLM
,
TFBertForNextSentencePrediction
,
load_
bert_
pt_weights_in_t
f
)
# Files and general utilities
from
.file_utils
import
(
PYTORCH_TRANSFORMERS_CACHE
,
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
,
add_start_docstrings
,
add_end_docstrings
,
WEIGHTS_NAME
,
TF_WEIGHTS_NAME
,
CONFIG_NAME
)
def
is_torch_available
():
return
_torch_available
def
is_tf_available
():
return
_tf_available
pytorch_transformers/convert_
bert_
pytorch_checkpoint_to_tf.py
→
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
View file @
518307df
...
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Convert
BERT
checkpoint
.
"""
"""
Convert
pytorch
checkpoint
s to TensorFlow
"""
from
__future__
import
absolute_import
from
__future__
import
division
...
...
@@ -21,19 +21,22 @@ from __future__ import print_function
import
argparse
import
tensorflow
as
tf
from
pytorch_transformers
import
BertConfig
,
TFBertForPreTraining
,
load_pt_weights_in_
ber
t
from
pytorch_transformers
import
BertConfig
,
TFBertForPreTraining
,
load_
bert_
pt_weights_in_t
f
import
logging
logging
.
basicConfig
(
level
=
logging
.
INFO
)
def
convert_bert_checkpoint_to_tf
(
pytorch_checkpoint_path
,
bert_config_file
,
tf_dump_path
):
def
convert_pt_checkpoint_to_tf
(
model_type
,
pytorch_checkpoint_path
,
config_file
,
tf_dump_path
):
if
model_type
==
'bert'
:
# Initialise TF model
config
=
BertConfig
.
from_json_file
(
bert_
config_file
)
config
=
BertConfig
.
from_json_file
(
config_file
)
print
(
"Building TensorFlow model from configuration: {}"
.
format
(
str
(
config
)))
model
=
TFBertForPreTraining
(
config
)
# Load weights from tf checkpoint
model
=
load_pt_weights_in_bert
(
model
,
config
,
pytorch_checkpoint_path
)
model
=
load_bert_pt_weights_in_tf
(
model
,
config
,
pytorch_checkpoint_path
)
else
:
raise
ValueError
(
"Unrecognized model type, should be one of ['bert']."
)
# Save pytorch-model
print
(
"Save TensorFlow model to {}"
.
format
(
tf_dump_path
))
...
...
@@ -43,16 +46,21 @@ def convert_bert_checkpoint_to_tf(pytorch_checkpoint_path, bert_config_file, tf_
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
## Required parameters
parser
.
add_argument
(
"--model_type"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Model type selcted in the list of."
)
parser
.
add_argument
(
"--pytorch_checkpoint_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path to the PyTorch checkpoint path."
)
parser
.
add_argument
(
"--
bert_
config_file"
,
parser
.
add_argument
(
"--config_file"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"The config json file corresponding to the pre-trained
BERT
model.
\n
"
help
=
"The config json file corresponding to the pre-trained model.
\n
"
"This specifies the model architecture."
)
parser
.
add_argument
(
"--tf_dump_path"
,
default
=
None
,
...
...
@@ -60,6 +68,7 @@ if __name__ == "__main__":
required
=
True
,
help
=
"Path to the output Tensorflow dump file."
)
args
=
parser
.
parse_args
()
convert_bert_checkpoint_to_tf
(
args
.
pytorch_checkpoint_path
,
args
.
bert_config_file
,
convert_pt_checkpoint_to_tf
(
args
.
model_type
.
lower
(),
args
.
pytorch_checkpoint_path
,
args
.
config_file
,
args
.
tf_dump_path
)
pytorch_transformers/modeling_tf_bert.py
View file @
518307df
...
...
@@ -51,7 +51,7 @@ TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
}
def
load_pt_weights_in_
ber
t
(
tf_model
,
config
,
pytorch_checkpoint_path
):
def
load_
bert_
pt_weights_in_t
f
(
tf_model
,
config
,
pytorch_checkpoint_path
):
""" Load pytorch checkpoints in a TF 2.0 model and save it using HDF5 format
We use HDF5 to easily do transfer learning
(see https://github.com/tensorflow/tensorflow/blob/ee16fcac960ae660e0e4496658a366e2f745e1f0/tensorflow/python/keras/engine/network.py#L1352-L1357).
...
...
@@ -150,6 +150,7 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
input_ids
,
position_ids
,
token_type_ids
=
inputs
...
...
@@ -194,6 +195,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
x
=
tf
.
reshape
(
x
,
(
batch_size
,
-
1
,
self
.
num_attention_heads
,
self
.
attention_head_size
))
return
tf
.
transpose
(
x
,
perm
=
[
0
,
2
,
1
,
3
])
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -242,6 +244,7 @@ class TFBertSelfOutput(tf.keras.layers.Layer):
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
input_tensor
=
inputs
...
...
@@ -261,6 +264,7 @@ class TFBertAttention(tf.keras.layers.Layer):
def
prune_heads
(
self
,
heads
):
raise
NotImplementedError
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
input_tensor
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -279,6 +283,7 @@ class TFBertIntermediate(tf.keras.layers.Layer):
else
:
self
.
intermediate_act_fn
=
config
.
hidden_act
@
tf
.
function
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
intermediate_act_fn
(
hidden_states
)
...
...
@@ -292,6 +297,7 @@ class TFBertOutput(tf.keras.layers.Layer):
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
input_tensor
=
inputs
...
...
@@ -309,6 +315,7 @@ class TFBertLayer(tf.keras.layers.Layer):
self
.
intermediate
=
TFBertIntermediate
(
config
,
name
=
'intermediate'
)
self
.
bert_output
=
TFBertOutput
(
config
,
name
=
'output'
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -327,6 +334,7 @@ class TFBertEncoder(tf.keras.layers.Layer):
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
layer
=
[
TFBertLayer
(
config
,
name
=
'layer_{}'
.
format
(
i
))
for
i
in
range
(
config
.
num_hidden_layers
)]
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -359,6 +367,7 @@ class TFBertPooler(tf.keras.layers.Layer):
super
(
TFBertPooler
,
self
).
__init__
(
**
kwargs
)
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
config
.
hidden_size
,
activation
=
'tanh'
,
name
=
'dense'
)
@
tf
.
function
def
call
(
self
,
hidden_states
):
# We "pool" the model by simply taking the hidden state corresponding
# to the first token.
...
...
@@ -377,6 +386,7 @@ class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
self
.
transform_act_fn
=
config
.
hidden_act
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
@
tf
.
function
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
transform_act_fn
(
hidden_states
)
...
...
@@ -400,6 +410,7 @@ class TFBertLMPredictionHead(tf.keras.layers.Layer):
trainable
=
True
,
name
=
'bias'
)
@
tf
.
function
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
...
...
@@ -411,6 +422,7 @@ class TFBertMLMHead(tf.keras.layers.Layer):
super
(
TFBertMLMHead
,
self
).
__init__
(
**
kwargs
)
self
.
predictions
=
TFBertLMPredictionHead
(
config
,
name
=
'predictions'
)
@
tf
.
function
def
call
(
self
,
sequence_output
):
prediction_scores
=
self
.
predictions
(
sequence_output
)
return
prediction_scores
...
...
@@ -421,6 +433,7 @@ class TFBertNSPHead(tf.keras.layers.Layer):
super
(
TFBertNSPHead
,
self
).
__init__
(
**
kwargs
)
self
.
seq_relationship
=
tf
.
keras
.
layers
.
Dense
(
2
,
name
=
'seq_relationship'
)
@
tf
.
function
def
call
(
self
,
pooled_output
):
seq_relationship_score
=
self
.
seq_relationship
(
pooled_output
)
return
seq_relationship_score
...
...
@@ -447,6 +460,7 @@ class TFBertMainLayer(tf.keras.layers.Layer):
"""
raise
NotImplementedError
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
...
...
@@ -459,12 +473,12 @@ class TFBertMainLayer(tf.keras.layers.Layer):
head_mask
=
inputs
[
4
]
if
len
(
inputs
)
>
4
else
None
assert
len
(
inputs
)
<=
5
,
"Too many inputs."
else
:
input_ids
=
inputs
.
pop
(
'input_ids'
)
attention_mask
=
inputs
.
pop
(
'attention_mask'
,
None
)
token_type_ids
=
inputs
.
pop
(
'token_type_ids'
,
None
)
position_ids
=
inputs
.
pop
(
'position_ids'
,
None
)
head_mask
=
inputs
.
pop
(
'head_mask'
,
None
)
assert
len
(
inputs
)
=
=
0
,
"
Unexpected inputs detected: {}. Check inputs dict key names."
.
format
(
list
(
inputs
.
keys
()))
input_ids
=
inputs
.
get
(
'input_ids'
)
attention_mask
=
inputs
.
get
(
'attention_mask'
,
None
)
token_type_ids
=
inputs
.
get
(
'token_type_ids'
,
None
)
position_ids
=
inputs
.
get
(
'position_ids'
,
None
)
head_mask
=
inputs
.
get
(
'head_mask'
,
None
)
assert
len
(
inputs
)
<
=
5
,
"
Too many inputs."
if
attention_mask
is
None
:
attention_mask
=
tf
.
fill
(
tf
.
shape
(
input_ids
),
1
)
...
...
@@ -507,23 +521,16 @@ class TFBertMainLayer(tf.keras.layers.Layer):
outputs
=
(
sequence_output
,
pooled_output
,)
+
encoder_outputs
[
1
:]
# add hidden_states and attentions if they are here
return
outputs
# sequence_output, pooled_output, (hidden_states), (attentions)
class
TFBertPreTrainedModel
(
TFPreTrainedModel
):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
"""
config_class
=
BertConfig
pretrained_model_archive_map
=
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
load_pt_weights
=
load_pt_weights_in_
ber
t
load_pt_weights
=
load_
bert_
pt_weights_in_t
f
base_model_prefix
=
"bert"
def
__init__
(
self
,
*
inputs
,
**
kwargs
):
super
(
TFBertPreTrainedModel
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
def
init_weights
(
self
,
module
):
""" Initialize the weights.
"""
raise
NotImplementedError
BERT_START_DOCSTRING
=
r
""" The BERT model was proposed in
`BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding`_
...
...
@@ -635,6 +642,7 @@ class TFBertModel(TFBertPreTrainedModel):
super
(
TFBertModel
,
self
).
__init__
(
config
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
return
outputs
...
...
@@ -687,7 +695,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
self
.
cls_mlm
=
TFBertMLMHead
(
config
,
name
=
'cls_mlm'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
# self.apply(self.init_weights) # TODO check added weights initialization
self
.
tie_weights
()
def
tie_weights
(
self
):
...
...
@@ -695,6 +702,7 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
"""
pass
# TODO add weights tying
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
@@ -704,14 +712,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
outputs
=
(
prediction_scores
,
seq_relationship_score
,)
+
outputs
[
2
:]
# add hidden states and attention if they are here
# if masked_lm_labels is not None and next_sentence_label is not None:
# loss_fct = CrossEntropyLoss(ignore_index=-1)
# masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
# next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
# total_loss = masked_lm_loss + next_sentence_loss
# outputs = (total_loss,) + outputs
# TODO add example with losses using model.compile and a dictionary of losses (give names to the output layers)
return
outputs
# prediction_scores, seq_relationship_score, (hidden_states), (attentions)
...
...
@@ -753,7 +753,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
cls_mlm
=
TFBertMLMHead
(
config
,
name
=
'cls_mlm'
)
# self.apply(self.init_weights)
self
.
tie_weights
()
def
tie_weights
(
self
):
...
...
@@ -761,6 +760,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
"""
pass
# TODO add weights tying
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
@@ -768,11 +768,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
prediction_scores
=
self
.
cls_mlm
(
sequence_output
)
outputs
=
(
prediction_scores
,)
+
outputs
[
2
:]
# Add hidden states and attention if they are here
# if masked_lm_labels is not None:
# loss_fct = CrossEntropyLoss(ignore_index=-1)
# masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
# outputs = (masked_lm_loss,) + outputs
# TODO example with losses
return
outputs
# prediction_scores, (hidden_states), (attentions)
...
...
@@ -815,8 +810,7 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
# self.apply(self.init_weights)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
@@ -824,9 +818,299 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
seq_relationship_score
=
self
.
cls_nsp
(
pooled_output
)
outputs
=
(
seq_relationship_score
,)
+
outputs
[
2
:]
# add hidden states and attention if they are here
# if next_sentence_label is not None:
# loss_fct = CrossEntropyLoss(ignore_index=-1)
# next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
# outputs = (next_sentence_loss,) + outputs
return
outputs
# seq_relationship_score, (hidden_states), (attentions)
@
add_start_docstrings
(
"""Bert Model transformer with a sequence classification/regression head on top (a linear layer on top of
the pooled output) e.g. for GLUE tasks. """
,
BERT_START_DOCSTRING
,
BERT_INPUTS_DOCSTRING
)
class
TFBertForSequenceClassification
(
TFBertPreTrainedModel
):
r
"""
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
Labels for computing the sequence classification/regression loss.
Indices should be in ``[0, ..., config.num_labels - 1]``.
If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss),
If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy).
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
Classification (or regression if config.num_labels==1) loss.
**logits**: ``torch.FloatTensor`` of shape ``(batch_size, config.num_labels)``
Classification (or regression if config.num_labels==1) scores (before SoftMax).
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
of shape ``(batch_size, sequence_length, hidden_size)``:
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
Examples::
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
"""
def
__init__
(
self
,
config
):
super
(
TFBertForSequenceClassification
,
self
).
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'classifier'
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
pooled_output
=
outputs
[
1
]
pooled_output
=
self
.
dropout
(
pooled_output
)
logits
=
self
.
classifier
(
pooled_output
)
outputs
=
(
logits
,)
+
outputs
[
2
:]
# add hidden states and attention if they are here
return
outputs
# logits, (hidden_states), (attentions)
@
add_start_docstrings
(
"""Bert Model with a multiple choice classification head on top (a linear layer on top of
the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """
,
BERT_START_DOCSTRING
)
class
TFBertForMultipleChoice
(
TFBertPreTrainedModel
):
r
"""
Inputs:
**input_ids**: ``torch.LongTensor`` of shape ``(batch_size, num_choices, sequence_length)``:
Indices of input sequence tokens in the vocabulary.
The second dimension of the input (`num_choices`) indicates the number of choices to score.
To match pre-training, BERT input sequence should be formatted with [CLS] and [SEP] tokens as follows:
(a) For sequence pairs:
``tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]``
``token_type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1``
(b) For single sequences:
``tokens: [CLS] the dog is hairy . [SEP]``
``token_type_ids: 0 0 0 0 0 0 0``
Indices can be obtained using :class:`pytorch_transformers.BertTokenizer`.
See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and
:func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details.
**token_type_ids**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, num_choices, sequence_length)``:
Segment token indices to indicate first and second portions of the inputs.
The second dimension of the input (`num_choices`) indicates the number of choices to score.
Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1``
corresponds to a `sentence B` token
(see `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding`_ for more details).
**attention_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, num_choices, sequence_length)``:
Mask to avoid performing attention on padding token indices.
The second dimension of the input (`num_choices`) indicates the number of choices to score.
Mask values selected in ``[0, 1]``:
``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
**head_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(num_heads,)`` or ``(num_layers, num_heads)``:
Mask to nullify selected heads of the self-attention modules.
Mask values selected in ``[0, 1]``:
``1`` indicates the head is **not masked**, ``0`` indicates the head is **masked**.
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
Labels for computing the multiple choice classification loss.
Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension
of the input tensors. (see `input_ids` above)
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
Classification loss.
**classification_scores**: ``torch.FloatTensor`` of shape ``(batch_size, num_choices)`` where `num_choices` is the size of the second dimension
of the input tensors. (see `input_ids` above).
Classification scores (before SoftMax).
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
of shape ``(batch_size, sequence_length, hidden_size)``:
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
Examples::
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
super
(
TFBertForMultipleChoice
,
self
).
__init__
(
config
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
1
,
name
=
'classifier'
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
attention_mask
,
head_mask
,
position_ids
,
token_type_ids
=
None
,
None
,
None
,
None
elif
isinstance
(
inputs
,
(
tuple
,
list
)):
input_ids
=
inputs
[
0
]
attention_mask
=
inputs
[
1
]
if
len
(
inputs
)
>
1
else
None
token_type_ids
=
inputs
[
2
]
if
len
(
inputs
)
>
2
else
None
position_ids
=
inputs
[
3
]
if
len
(
inputs
)
>
3
else
None
head_mask
=
inputs
[
4
]
if
len
(
inputs
)
>
4
else
None
assert
len
(
inputs
)
<=
5
,
"Too many inputs."
else
:
input_ids
=
inputs
.
get
(
'input_ids'
)
attention_mask
=
inputs
.
get
(
'attention_mask'
,
None
)
token_type_ids
=
inputs
.
get
(
'token_type_ids'
,
None
)
position_ids
=
inputs
.
get
(
'position_ids'
,
None
)
head_mask
=
inputs
.
get
(
'head_mask'
,
None
)
assert
len
(
inputs
)
<=
5
,
"Too many inputs."
num_choices
=
tf
.
shape
(
input_ids
)[
1
]
seq_length
=
tf
.
shape
(
input_ids
)[
2
]
flat_input_ids
=
tf
.
reshape
(
input_ids
,
(
-
1
,
seq_length
))
flat_attention_mask
=
tf
.
reshape
(
attention_mask
,
(
-
1
,
seq_length
))
if
attention_mask
is
not
None
else
None
flat_token_type_ids
=
tf
.
reshape
(
token_type_ids
,
(
-
1
,
seq_length
))
if
token_type_ids
is
not
None
else
None
flat_position_ids
=
tf
.
reshape
(
position_ids
,
(
-
1
,
seq_length
))
if
position_ids
is
not
None
else
None
flat_inputs
=
[
flat_input_ids
,
flat_attention_mask
,
flat_token_type_ids
,
flat_position_ids
,
head_mask
]
outputs
=
self
.
bert
(
flat_inputs
,
training
=
training
)
pooled_output
=
outputs
[
1
]
pooled_output
=
self
.
dropout
(
pooled_output
)
logits
=
self
.
classifier
(
pooled_output
)
reshaped_logits
=
tf
.
reshape
(
logits
,
(
-
1
,
num_choices
))
outputs
=
(
reshaped_logits
,)
+
outputs
[
2
:]
# add hidden states and attention if they are here
return
outputs
# reshaped_logits, (hidden_states), (attentions)
@
add_start_docstrings
(
"""Bert Model with a token classification head on top (a linear layer on top of
the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """
,
BERT_START_DOCSTRING
,
BERT_INPUTS_DOCSTRING
)
class
TFBertForTokenClassification
(
TFBertPreTrainedModel
):
r
"""
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
Labels for computing the token classification loss.
Indices should be in ``[0, ..., config.num_labels - 1]``.
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
Classification loss.
**scores**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, config.num_labels)``
Classification scores (before SoftMax).
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
of shape ``(batch_size, sequence_length, hidden_size)``:
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
Examples::
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForTokenClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
super
(
TFBertForTokenClassification
,
self
).
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'classifier'
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
sequence_output
=
outputs
[
0
]
sequence_output
=
self
.
dropout
(
sequence_output
)
logits
=
self
.
classifier
(
sequence_output
)
outputs
=
(
logits
,)
+
outputs
[
2
:]
# add hidden states and attention if they are here
return
outputs
# scores, (hidden_states), (attentions)
@
add_start_docstrings
(
"""Bert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
the hidden-states output to compute `span start logits` and `span end logits`). """
,
BERT_START_DOCSTRING
,
BERT_INPUTS_DOCSTRING
)
class
TFBertForQuestionAnswering
(
TFBertPreTrainedModel
):
r
"""
**start_positions**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
Labels for position (index) of the start of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`).
Position outside of the sequence are not taken into account for computing the loss.
**end_positions**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
Labels for position (index) of the end of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`).
Position outside of the sequence are not taken into account for computing the loss.
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
**loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
**start_scores**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length,)``
Span-start scores (before SoftMax).
**end_scores**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length,)``
Span-end scores (before SoftMax).
**hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
of shape ``(batch_size, sequence_length, hidden_size)``:
Hidden-states of the model at the output of each layer plus the initial embedding outputs.
**attentions**: (`optional`, returned when ``config.output_attentions=True``)
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
Examples::
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
super
(
TFBertForQuestionAnswering
,
self
).
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
qa_outputs
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'qa_outputs'
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
sequence_output
=
outputs
[
0
]
logits
=
self
.
qa_outputs
(
sequence_output
)
start_logits
,
end_logits
=
tf
.
split
(
logits
,
2
,
axis
=-
1
)
start_logits
=
tf
.
squeeze
(
start_logits
,
axis
=-
1
)
end_logits
=
tf
.
squeeze
(
end_logits
,
axis
=-
1
)
outputs
=
(
start_logits
,
end_logits
,)
+
outputs
[
2
:]
return
outputs
# start_logits, end_logits, (hidden_states), (attentions)
pytorch_transformers/tests/modeling_auto_test.py
View file @
518307df
...
...
@@ -21,15 +21,18 @@ import shutil
import
pytest
import
logging
from
pytorch_transformers
import
(
AutoConfig
,
BertConfig
,
try
:
from
pytorch_transformers
import
(
AutoConfig
,
BertConfig
,
AutoModel
,
BertModel
,
AutoModelWithLMHead
,
BertForMaskedLM
,
AutoModelForSequenceClassification
,
BertForSequenceClassification
,
AutoModelForQuestionAnswering
,
BertForQuestionAnswering
)
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
class
AutoModelTest
(
unittest
.
TestCase
):
...
...
pytorch_transformers/tests/modeling_bert_test.py
View file @
518307df
...
...
@@ -20,21 +20,26 @@ import unittest
import
shutil
import
pytest
from
pytorch_transformers
import
(
BertConfig
,
BertModel
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForTokenClassification
,
BertForMultipleChoice
)
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers
import
is_torch_available
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
try
:
from
pytorch_transformers
import
(
BertConfig
,
BertModel
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForTokenClassification
,
BertForMultipleChoice
)
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
class
BertModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
BertModel
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForTokenClassification
)
BertForTokenClassification
)
if
is_torch_available
()
else
()
class
BertModelTester
(
object
):
...
...
pytorch_transformers/tests/modeling_common_test.py
View file @
518307df
...
...
@@ -25,12 +25,16 @@ import uuid
import
unittest
import
logging
import
pytest
import
torch
try
:
import
torch
from
pytorch_transformers
import
(
PretrainedConfig
,
PreTrainedModel
,
from
pytorch_transformers
import
(
PretrainedConfig
,
PreTrainedModel
,
BertModel
,
BertConfig
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
GPT2LMHeadModel
,
GPT2Config
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
def
_config_zero_init
(
config
):
...
...
pytorch_transformers/tests/modeling_distilbert_test.py
View file @
518307df
...
...
@@ -17,9 +17,15 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
pytest
from
pytorch_transformers
import
(
DistilBertConfig
,
DistilBertModel
,
DistilBertForMaskedLM
,
from
pytorch_transformers
import
is_torch_available
try
:
from
pytorch_transformers
import
(
DistilBertConfig
,
DistilBertModel
,
DistilBertForMaskedLM
,
DistilBertForQuestionAnswering
,
DistilBertForSequenceClassification
)
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
...
...
@@ -28,7 +34,7 @@ from .configuration_common_test import ConfigTester
class
DistilBertModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
DistilBertModel
,
DistilBertForMaskedLM
,
DistilBertForQuestionAnswering
,
DistilBertForSequenceClassification
)
DistilBertForSequenceClassification
)
if
is_torch_available
()
else
None
test_pruning
=
True
test_torchscript
=
True
test_resize_embeddings
=
True
...
...
pytorch_transformers/tests/modeling_gpt2_test.py
View file @
518307df
...
...
@@ -20,9 +20,13 @@ import unittest
import
pytest
import
shutil
from
pytorch_transformers
import
is_torch_available
from
pytorch_transformers
import
(
GPT2Config
,
GPT2Model
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
,
try
:
from
pytorch_transformers
import
(
GPT2Config
,
GPT2Model
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
...
...
@@ -30,7 +34,7 @@ from .configuration_common_test import ConfigTester
class
GPT2ModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
all_model_classes
=
(
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
if
is_torch_available
()
else
()
class
GPT2ModelTester
(
object
):
...
...
pytorch_transformers/tests/modeling_openai_test.py
View file @
518307df
...
...
@@ -20,9 +20,13 @@ import unittest
import
pytest
import
shutil
from
pytorch_transformers
import
is_torch_available
from
pytorch_transformers
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
,
try
:
from
pytorch_transformers
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
...
...
@@ -30,7 +34,7 @@ from .configuration_common_test import ConfigTester
class
OpenAIGPTModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
all_model_classes
=
(
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
if
is_torch_available
()
else
()
class
OpenAIGPTModelTester
(
object
):
...
...
pytorch_transformers/tests/modeling_roberta_test.py
View file @
518307df
...
...
@@ -19,10 +19,15 @@ from __future__ import print_function
import
unittest
import
shutil
import
pytest
import
torch
from
pytorch_transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
RobertaForSequenceClassification
)
from
pytorch_transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers
import
is_torch_available
try
:
import
torch
from
pytorch_transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
RobertaForSequenceClassification
)
from
pytorch_transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
...
...
@@ -30,7 +35,7 @@ from .configuration_common_test import ConfigTester
class
RobertaModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
RobertaForMaskedLM
,
RobertaModel
)
all_model_classes
=
(
RobertaForMaskedLM
,
RobertaModel
)
if
is_torch_available
()
else
()
class
RobertaModelTester
(
object
):
...
...
pytorch_transformers/tests/modeling_tf_bert_test.py
View file @
518307df
...
...
@@ -24,21 +24,27 @@ import sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
pytorch_transformers
import
BertConfig
,
is_tf_available
try
:
import
tensorflow
as
tf
from
pytorch_transformers
import
(
BertConfig
)
from
pytorch_transformers.modeling_tf_bert
import
TFBertModel
,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_tf_bert
import
(
TFBertModel
,
TFBertForMaskedLM
,
TFBertForNextSentencePrediction
,
TFBertForPreTraining
,
TFBertForSequenceClassification
,
TFBertForMultipleChoice
,
TFBertForTokenClassification
,
TFBertForQuestionAnswering
,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
except
ImportError
:
p
ass
p
ytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
class
TFBertModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
all_model_classes
=
(
TFBertModel
,)
# BertForMaskedLM, BertForNextSentencePrediction,
# BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
# BertForTokenClassification)
all_model_classes
=
(
TFBertModel
,
TFBertForMaskedLM
,
TFBertForNextSentencePrediction
,
TFBertForPreTraining
,
TFBertForQuestionAnswering
,
TFBertForSequenceClassification
,
TFBertForTokenClassification
)
if
is_tf_available
()
else
()
class
TFBertModelTester
(
object
):
...
...
@@ -123,14 +129,8 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_bert_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertModel
(
config
=
config
)
# model.eval()
inputs
=
{
'input_ids'
:
input_ids
,
'attention_mask'
:
input_mask
,
'token_type_ids'
:
token_type_ids
}
...
...
@@ -152,125 +152,115 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
def
create_and_check_bert_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
pass
# model = BertForMaskedLM(config=config)
# model.eval()
# loss, prediction_scores = model(input_ids, token_type_ids, input_mask, token_labels)
# result = {
# "loss": loss,
# "prediction_scores": prediction_scores,
# }
# self.parent.assertListEqual(
# list(result["prediction_scores"].size()),
# [self.batch_size, self.seq_length, self.vocab_size])
# self.check_loss_output(result)
model
=
TFBertForMaskedLM
(
config
=
config
)
inputs
=
{
'input_ids'
:
input_ids
,
'attention_mask'
:
input_mask
,
'token_type_ids'
:
token_type_ids
}
prediction_scores
,
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
])
def
create_and_check_bert_for_next_sequence_prediction
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
pass
# model = BertForNextSentencePrediction(config=config)
# model.eval()
# loss, seq_relationship_score = model(input_ids, token_type_ids, input_mask, sequence_labels)
# result = {
# "loss": loss,
# "seq_relationship_score": seq_relationship_score,
# }
# self.parent.assertListEqual(
# list(result["seq_relationship_score"].size()),
# [self.batch_size, 2])
# self.check_loss_output(result)
model
=
TFBertForNextSentencePrediction
(
config
=
config
)
inputs
=
{
'input_ids'
:
input_ids
,
'attention_mask'
:
input_mask
,
'token_type_ids'
:
token_type_ids
}
seq_relationship_score
,
=
model
(
inputs
)
result
=
{
"seq_relationship_score"
:
seq_relationship_score
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"seq_relationship_score"
].
shape
),
[
self
.
batch_size
,
2
])
def
create_and_check_bert_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
pass
# model = BertForPreTraining(config=config)
# model.eval()
# loss, prediction_scores, seq_relationship_score = model(input_ids, token_type_ids, input_mask, token_labels, sequence_labels)
# result = {
# "loss": loss,
# "prediction_scores": prediction_scores,
# "seq_relationship_score": seq_relationship_score,
# }
# self.parent.assertListEqual(
# list(result["prediction_scores"].size()),
# [self.batch_size, self.seq_length, self.vocab_size])
# self.parent.assertListEqual(
# list(result["seq_relationship_score"].size()),
# [self.batch_size, 2])
# self.check_loss_output(result)
model
=
TFBertForPreTraining
(
config
=
config
)
inputs
=
{
'input_ids'
:
input_ids
,
'attention_mask'
:
input_mask
,
'token_type_ids'
:
token_type_ids
}
prediction_scores
,
seq_relationship_score
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
"seq_relationship_score"
:
seq_relationship_score
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"seq_relationship_score"
].
shape
),
[
self
.
batch_size
,
2
])
def
create_and_check_bert_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
pass
# model = BertForQuestionAnswering(config=config)
# model.eval()
# loss, start_logits, end_logits = model(input_ids, token_type_ids, input_mask, sequence_labels, sequence_labels)
# result = {
# "loss": loss,
# "start_logits": start_logits,
# "end_logits": end_logits,
# }
# self.parent.assertListEqual(
# list(result["start_logits"].size()),
# [self.batch_size, self.seq_length])
# self.parent.assertListEqual(
# list(result["end_logits"].size()),
# [self.batch_size, self.seq_length])
# self.check_loss_output(result)
def
create_and_check_bert_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFBertForSequenceClassification
(
config
=
config
)
inputs
=
{
'input_ids'
:
input_ids
,
'attention_mask'
:
input_mask
,
'token_type_ids'
:
token_type_ids
}
logits
,
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
create_and_check_bert_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
pass
# config.num_labels = self.num_labels
# model = BertForSequenceClassification(config)
# model.eval()
# loss, logits = model(input_ids, token_type_ids, input_mask, sequence_labels)
# result = {
# "loss": loss,
# "logits": logits,
# }
# self.parent.assertListEqual(
# list(result["logits"].size()),
# [self.batch_size, self.num_labels])
# self.check_loss_output(result)
def
create_and_check_bert_for_multiple_choice
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_choices
=
self
.
num_choices
model
=
TFBertForMultipleChoice
(
config
=
config
)
multiple_choice_inputs_ids
=
tf
.
tile
(
tf
.
expand_dims
(
input_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_input_mask
=
tf
.
tile
(
tf
.
expand_dims
(
input_mask
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_token_type_ids
=
tf
.
tile
(
tf
.
expand_dims
(
token_type_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
inputs
=
{
'input_ids'
:
multiple_choice_inputs_ids
,
'attention_mask'
:
multiple_choice_input_mask
,
'token_type_ids'
:
multiple_choice_token_type_ids
}
logits
,
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
])
def
create_and_check_bert_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
pass
# config.num_labels = self.num_labels
# model = BertForTokenClassification(config=config)
# model.eval()
# loss, logits = model(input_ids, token_type_ids, input_mask, token_labels)
# result = {
# "loss": loss,
# "logits": logits,
# }
# self.parent.assertListEqual(
# list(result["logits"].size()),
# [self.batch_size, self.seq_length, self.num_labels])
# self.check_loss_output(result)
config
.
num_labels
=
self
.
num_labels
model
=
TFBertForTokenClassification
(
config
=
config
)
inputs
=
{
'input_ids'
:
input_ids
,
'attention_mask'
:
input_mask
,
'token_type_ids'
:
token_type_ids
}
logits
,
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
])
def
create_and_check_bert_for_multiple_choice
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
pass
# config.num_choices = self.num_choices
# model = BertForMultipleChoice(config=config)
# model.eval()
# multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
# multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
# multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
# loss, logits = model(multiple_choice_inputs_ids,
# multiple_choice_token_type_ids,
# multiple_choice_input_mask,
# choice_labels)
# result = {
# "loss": loss,
# "logits": logits,
# }
# self.parent.assertListEqual(
# list(result["logits"].size()),
# [self.batch_size, self.num_choices])
# self.check_loss_output(result)
def
create_and_check_bert_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForQuestionAnswering
(
config
=
config
)
inputs
=
{
'input_ids'
:
input_ids
,
'attention_mask'
:
input_mask
,
'token_type_ids'
:
token_type_ids
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
prepare_config_and_inputs_for_common
(
self
):
...
...
@@ -287,48 +277,39 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
def
test_config
(
self
):
self
.
config_tester
.
run_common_tests
()
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_bert_model
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_model
(
*
config_and_inputs
)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_for_masked_lm
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_masked_lm
(
*
config_and_inputs
)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_for_multiple_choice
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_multiple_choice
(
*
config_and_inputs
)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_for_next_sequence_prediction
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_next_sequence_prediction
(
*
config_and_inputs
)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_for_pretraining
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_pretraining
(
*
config_and_inputs
)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_for_question_answering
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_question_answering
(
*
config_and_inputs
)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_for_sequence_classification
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_sequence_classification
(
*
config_and_inputs
)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_for_token_classification
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_token_classification
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
...
...
pytorch_transformers/tests/modeling_tf_common_test.py
View file @
518307df
...
...
@@ -30,7 +30,7 @@ try:
from
pytorch_transformers
import
TFPreTrainedModel
# from pytorch_transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
except
ImportError
:
p
ass
p
ytestmark
=
pytest
.
mark
.
skip
(
"Require TensorFlow"
)
def
_config_zero_init
(
config
):
...
...
@@ -50,7 +50,6 @@ class TFCommonTestCases:
test_pruning
=
True
test_resize_embeddings
=
True
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_initialization
(
self
):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...
...
@@ -64,7 +63,6 @@ class TFCommonTestCases:
# msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_attention_outputs
(
self
):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...
...
@@ -105,7 +103,6 @@ class TFCommonTestCases:
# self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_headmasking
(
self
):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...
...
@@ -153,7 +150,6 @@ class TFCommonTestCases:
# attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_head_pruning
(
self
):
pass
# if not self.test_pruning:
...
...
@@ -181,7 +177,6 @@ class TFCommonTestCases:
# attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_hidden_states_output
(
self
):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...
...
@@ -201,7 +196,6 @@ class TFCommonTestCases:
# [self.model_tester.seq_length, self.model_tester.hidden_size])
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_resize_tokens_embeddings
(
self
):
pass
# original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...
...
@@ -238,7 +232,6 @@ class TFCommonTestCases:
# self.assertTrue(models_equal)
@
pytest
.
mark
.
skipif
(
'tensorflow'
not
in
sys
.
modules
,
reason
=
"requires TensorFlow"
)
def
test_tie_model_weights
(
self
):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...
...
pytorch_transformers/tests/modeling_transfo_xl_test.py
View file @
518307df
...
...
@@ -21,17 +21,21 @@ import random
import
shutil
import
pytest
import
torch
from
pytorch_transformers
import
is_
torch
_available
from
pytorch_transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
pytorch_transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
try
:
import
torch
from
pytorch_transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
pytorch_transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
class
TransfoXLModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
TransfoXLModel
,
TransfoXLLMHeadModel
)
all_model_classes
=
(
TransfoXLModel
,
TransfoXLLMHeadModel
)
if
is_torch_available
()
else
()
test_pruning
=
False
test_torchscript
=
False
test_resize_embeddings
=
False
...
...
pytorch_transformers/tests/modeling_xlm_test.py
View file @
518307df
...
...
@@ -20,8 +20,14 @@ import unittest
import
shutil
import
pytest
from
pytorch_transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
from
pytorch_transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers
import
is_torch_available
try
:
from
pytorch_transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
from
pytorch_transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
...
...
@@ -29,9 +35,9 @@ from .configuration_common_test import ConfigTester
class
XLMModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
XLMModel
,
XLMWithLMHeadModel
,
XLMFor
QuestionAnswering
,
XLMForSequenceClassification
)
# , XLMForSequenceClassification, XLMForTokenClassification),
all_model_classes
=
(
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMFor
SequenceClassification
)
if
is_torch_available
()
else
()
class
XLMModelTester
(
object
):
...
...
pytorch_transformers/tests/modeling_xlnet_test.py
View file @
518307df
...
...
@@ -23,10 +23,15 @@ import random
import
shutil
import
pytest
import
torch
from
pytorch_transformers
import
is_
torch
_available
from
pytorch_transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
from
pytorch_transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
try
:
import
torch
from
pytorch_transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
from
pytorch_transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
...
...
@@ -34,7 +39,7 @@ from .configuration_common_test import ConfigTester
class
XLNetModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
if
is_torch_available
()
else
()
test_pruning
=
False
class
XLNetModelTester
(
object
):
...
...
pytorch_transformers/tests/optimization_test.py
View file @
518307df
...
...
@@ -18,11 +18,17 @@ from __future__ import print_function
import
unittest
import
os
import
pytest
import
torch
from
pytorch_transformers
import
is_
torch
_available
from
pytorch_transformers
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
try
:
import
torch
from
pytorch_transformers
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
WarmupCosineSchedule
,
WarmupCosineWithHardRestartsSchedule
,
WarmupLinearSchedule
)
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
from
.tokenization_tests_commons
import
TemporaryDirectory
...
...
@@ -71,8 +77,8 @@ class OptimizationTest(unittest.TestCase):
class
ScheduleInitTest
(
unittest
.
TestCase
):
m
=
torch
.
nn
.
Linear
(
50
,
50
)
optimizer
=
AdamW
(
m
.
parameters
(),
lr
=
10.
)
m
=
torch
.
nn
.
Linear
(
50
,
50
)
if
is_torch_available
()
else
None
optimizer
=
AdamW
(
m
.
parameters
(),
lr
=
10.
)
if
is_torch_available
()
else
None
num_steps
=
10
def
assertListAlmostEqual
(
self
,
list1
,
list2
,
tol
):
...
...
pytorch_transformers/tests/tokenization_auto_test.py
View file @
518307df
...
...
@@ -22,20 +22,19 @@ import pytest
import
logging
from
pytorch_transformers
import
AutoTokenizer
,
BertTokenizer
,
AutoTokenizer
,
GPT2Tokenizer
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_gpt2
import
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers
import
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
class
AutoTokenizerTest
(
unittest
.
TestCase
):
def
test_tokenizer_from_pretrained
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
for
model_name
in
list
(
BERT_PRETRAINED_
MODEL
_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
BERT_PRETRAINED_
CONFIG
_ARCHIVE_MAP
.
keys
())[:
1
]:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
tokenizer
)
self
.
assertIsInstance
(
tokenizer
,
BertTokenizer
)
self
.
assertGreater
(
len
(
tokenizer
),
0
)
for
model_name
in
list
(
GPT2_PRETRAINED_
MODEL
_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
GPT2_PRETRAINED_
CONFIG
_ARCHIVE_MAP
.
keys
())[:
1
]:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
tokenizer
)
self
.
assertIsInstance
(
tokenizer
,
GPT2Tokenizer
)
...
...
pytorch_transformers/tests/tokenization_transfo_xl_test.py
View file @
518307df
...
...
@@ -16,15 +16,21 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import
os
import
unittest
import
pytest
from
io
import
open
from
pytorch_transformers
.tokenization_transfo_xl
import
TransfoXLTokenizer
,
VOCAB_FILES_NAMES
from
pytorch_transformers
import
is_torch_available
from
.
tokenization_tests_commons
import
CommonTestCases
try
:
from
pytorch_transformers.tokenization_transfo_xl
import
TransfoXLTokenizer
,
VOCAB_FILES_NAMES
except
ImportError
:
pytestmark
=
pytest
.
mark
.
skip
(
"Require Torch"
)
# TODO: untangle Transfo-XL tokenizer from torch.load and torch.save
from
.tokenization_tests_commons
import
CommonTestCases
class
TransfoXLTokenizationTest
(
CommonTestCases
.
CommonTokenizerTester
):
tokenizer_class
=
TransfoXLTokenizer
tokenizer_class
=
TransfoXLTokenizer
if
is_torch_available
()
else
None
def
setUp
(
self
):
super
(
TransfoXLTokenizationTest
,
self
).
setUp
()
...
...
pytorch_transformers/tokenization_transfo_xl.py
View file @
518307df
...
...
@@ -26,16 +26,20 @@ import sys
from
collections
import
Counter
,
OrderedDict
from
io
import
open
import
torch
import
numpy
as
np
from
.file_utils
import
cached_path
from
.tokenization_utils
import
PreTrainedTokenizer
if
sys
.
version_info
[
0
]
==
2
:
import
cPickle
as
pickle
else
:
import
pickle
try
:
import
torch
except
ImportError
:
pass
# if sys.version_info[0] == 2:
# import cPickle as pickle
# else:
# import pickle
logger
=
logging
.
getLogger
(
__name__
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment