Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a52d56c8
Unverified
Commit
a52d56c8
authored
Dec 14, 2019
by
Thomas Wolf
Committed by
GitHub
Dec 14, 2019
Browse files
Merge branch 'master' into cleanup-configs
parents
8ade2040
e92bcb7e
Changes
46
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2274 additions
and
107 deletions
+2274
-107
transformers/hf_api.py
transformers/hf_api.py
+2
-1
transformers/modeling_auto.py
transformers/modeling_auto.py
+11
-2
transformers/modeling_encoder_decoder.py
transformers/modeling_encoder_decoder.py
+1
-3
transformers/modeling_t5.py
transformers/modeling_t5.py
+886
-0
transformers/modeling_tf_auto.py
transformers/modeling_tf_auto.py
+11
-2
transformers/modeling_tf_pytorch_utils.py
transformers/modeling_tf_pytorch_utils.py
+7
-3
transformers/modeling_tf_t5.py
transformers/modeling_tf_t5.py
+775
-0
transformers/modeling_tf_utils.py
transformers/modeling_tf_utils.py
+2
-4
transformers/modeling_tf_xlm.py
transformers/modeling_tf_xlm.py
+1
-1
transformers/modeling_utils.py
transformers/modeling_utils.py
+12
-6
transformers/modeling_xlm.py
transformers/modeling_xlm.py
+11
-1
transformers/tests/fixtures/empty.txt
transformers/tests/fixtures/empty.txt
+0
-0
transformers/tests/hf_api_test.py
transformers/tests/hf_api_test.py
+31
-13
transformers/tests/modeling_common_test.py
transformers/tests/modeling_common_test.py
+85
-38
transformers/tests/modeling_t5_test.py
transformers/tests/modeling_t5_test.py
+185
-0
transformers/tests/modeling_tf_common_test.py
transformers/tests/modeling_tf_common_test.py
+79
-31
transformers/tests/modeling_tf_t5_test.py
transformers/tests/modeling_tf_t5_test.py
+172
-0
transformers/tests/modeling_tf_transfo_xl_test.py
transformers/tests/modeling_tf_transfo_xl_test.py
+1
-1
transformers/tests/modeling_transfo_xl_test.py
transformers/tests/modeling_transfo_xl_test.py
+1
-1
transformers/tests/tokenization_bert_test.py
transformers/tests/tokenization_bert_test.py
+1
-0
No files found.
transformers/hf_api.py
View file @
a52d56c8
...
@@ -131,8 +131,9 @@ class HfApi:
...
@@ -131,8 +131,9 @@ class HfApi:
# the client still has to specify it when uploading the file.
# the client still has to specify it when uploading the file.
with
open
(
filepath
,
"rb"
)
as
f
:
with
open
(
filepath
,
"rb"
)
as
f
:
pf
=
TqdmProgressFileReader
(
f
)
pf
=
TqdmProgressFileReader
(
f
)
data
=
f
if
pf
.
total_size
>
0
else
""
r
=
requests
.
put
(
urls
.
write
,
data
=
f
,
headers
=
{
r
=
requests
.
put
(
urls
.
write
,
data
=
data
,
headers
=
{
"content-type"
:
urls
.
type
,
"content-type"
:
urls
.
type
,
})
})
r
.
raise_for_status
()
r
.
raise_for_status
()
...
...
transformers/modeling_auto.py
View file @
a52d56c8
...
@@ -29,6 +29,7 @@ from .modeling_roberta import RobertaModel, RobertaForMaskedLM, RobertaForSequen
...
@@ -29,6 +29,7 @@ from .modeling_roberta import RobertaModel, RobertaForMaskedLM, RobertaForSequen
from
.modeling_distilbert
import
DistilBertModel
,
DistilBertForQuestionAnswering
,
DistilBertForMaskedLM
,
DistilBertForSequenceClassification
from
.modeling_distilbert
import
DistilBertModel
,
DistilBertForQuestionAnswering
,
DistilBertForMaskedLM
,
DistilBertForSequenceClassification
from
.modeling_camembert
import
CamembertModel
,
CamembertForMaskedLM
,
CamembertForSequenceClassification
,
CamembertForMultipleChoice
from
.modeling_camembert
import
CamembertModel
,
CamembertForMaskedLM
,
CamembertForSequenceClassification
,
CamembertForMultipleChoice
from
.modeling_albert
import
AlbertModel
,
AlbertForMaskedLM
,
AlbertForSequenceClassification
,
AlbertForQuestionAnswering
from
.modeling_albert
import
AlbertModel
,
AlbertForMaskedLM
,
AlbertForSequenceClassification
,
AlbertForQuestionAnswering
from
.modeling_t5
import
T5Model
,
T5WithLMHeadModel
from
.modeling_utils
import
PreTrainedModel
,
SequenceSummary
from
.modeling_utils
import
PreTrainedModel
,
SequenceSummary
...
@@ -49,6 +50,7 @@ class AutoModel(object):
...
@@ -49,6 +50,7 @@ class AutoModel(object):
The base model class to instantiate is selected as the first pattern matching
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
in the `pretrained_model_name_or_path` string (in the following order):
- contains `t5`: T5Model (T5 model)
- contains `distilbert`: DistilBertModel (DistilBERT model)
- contains `distilbert`: DistilBertModel (DistilBERT model)
- contains `albert`: AlbertModel (ALBERT model)
- contains `albert`: AlbertModel (ALBERT model)
- contains `camembert`: CamembertModel (CamemBERT model)
- contains `camembert`: CamembertModel (CamemBERT model)
...
@@ -74,6 +76,7 @@ class AutoModel(object):
...
@@ -74,6 +76,7 @@ class AutoModel(object):
The model class to instantiate is selected as the first pattern matching
The model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
in the `pretrained_model_name_or_path` string (in the following order):
- contains `t5`: T5Model (T5 model)
- contains `distilbert`: DistilBertModel (DistilBERT model)
- contains `distilbert`: DistilBertModel (DistilBERT model)
- contains `albert`: AlbertModel (ALBERT model)
- contains `albert`: AlbertModel (ALBERT model)
- contains `camembert`: CamembertModel (CamemBERT model)
- contains `camembert`: CamembertModel (CamemBERT model)
...
@@ -146,7 +149,9 @@ class AutoModel(object):
...
@@ -146,7 +149,9 @@ class AutoModel(object):
model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
"""
"""
if
'distilbert'
in
pretrained_model_name_or_path
:
if
't5'
in
pretrained_model_name_or_path
:
return
T5Model
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'distilbert'
in
pretrained_model_name_or_path
:
return
DistilBertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
return
DistilBertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'albert'
in
pretrained_model_name_or_path
:
elif
'albert'
in
pretrained_model_name_or_path
:
return
AlbertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
return
AlbertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
...
@@ -185,6 +190,7 @@ class AutoModelWithLMHead(object):
...
@@ -185,6 +190,7 @@ class AutoModelWithLMHead(object):
The model class to instantiate is selected as the first pattern matching
The model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
in the `pretrained_model_name_or_path` string (in the following order):
- contains `t5`: T5ModelWithLMHead (T5 model)
- contains `distilbert`: DistilBertForMaskedLM (DistilBERT model)
- contains `distilbert`: DistilBertForMaskedLM (DistilBERT model)
- contains `albert`: AlbertForMaskedLM (ALBERT model)
- contains `albert`: AlbertForMaskedLM (ALBERT model)
- contains `camembert`: CamembertForMaskedLM (CamemBERT model)
- contains `camembert`: CamembertForMaskedLM (CamemBERT model)
...
@@ -213,6 +219,7 @@ class AutoModelWithLMHead(object):
...
@@ -213,6 +219,7 @@ class AutoModelWithLMHead(object):
The model class to instantiate is selected as the first pattern matching
The model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
in the `pretrained_model_name_or_path` string (in the following order):
- contains `t5`: T5ModelWithLMHead (T5 model)
- contains `distilbert`: DistilBertForMaskedLM (DistilBERT model)
- contains `distilbert`: DistilBertForMaskedLM (DistilBERT model)
- contains `albert`: AlbertForMaskedLM (ALBERT model)
- contains `albert`: AlbertForMaskedLM (ALBERT model)
- contains `camembert`: CamembertForMaskedLM (CamemBERT model)
- contains `camembert`: CamembertForMaskedLM (CamemBERT model)
...
@@ -284,7 +291,9 @@ class AutoModelWithLMHead(object):
...
@@ -284,7 +291,9 @@ class AutoModelWithLMHead(object):
model = AutoModelWithLMHead.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
model = AutoModelWithLMHead.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
"""
"""
if
'distilbert'
in
pretrained_model_name_or_path
:
if
't5'
in
pretrained_model_name_or_path
:
return
T5WithLMHeadModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'distilbert'
in
pretrained_model_name_or_path
:
return
DistilBertForMaskedLM
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
return
DistilBertForMaskedLM
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'albert'
in
pretrained_model_name_or_path
:
elif
'albert'
in
pretrained_model_name_or_path
:
return
AlbertForMaskedLM
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
return
AlbertForMaskedLM
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
...
...
transformers/modeling_encoder_decoder.py
View file @
a52d56c8
...
@@ -219,9 +219,7 @@ class PreTrainedEncoderDecoder(nn.Module):
...
@@ -219,9 +219,7 @@ class PreTrainedEncoderDecoder(nn.Module):
encoder_hidden_states
=
kwargs_encoder
.
pop
(
"hidden_states"
,
None
)
encoder_hidden_states
=
kwargs_encoder
.
pop
(
"hidden_states"
,
None
)
if
encoder_hidden_states
is
None
:
if
encoder_hidden_states
is
None
:
encoder_outputs
=
self
.
encoder
(
encoder_input_ids
,
**
kwargs_encoder
)
encoder_outputs
=
self
.
encoder
(
encoder_input_ids
,
**
kwargs_encoder
)
encoder_hidden_states
=
encoder_outputs
[
encoder_hidden_states
=
encoder_outputs
[
0
]
0
]
# output the last layer hidden state
else
:
else
:
encoder_outputs
=
()
encoder_outputs
=
()
...
...
transformers/modeling_t5.py
0 → 100644
View file @
a52d56c8
This diff is collapsed.
Click to expand it.
transformers/modeling_tf_auto.py
View file @
a52d56c8
...
@@ -27,6 +27,7 @@ from .modeling_tf_xlm import TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceC
...
@@ -27,6 +27,7 @@ from .modeling_tf_xlm import TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceC
from
.modeling_tf_roberta
import
TFRobertaModel
,
TFRobertaForMaskedLM
,
TFRobertaForSequenceClassification
from
.modeling_tf_roberta
import
TFRobertaModel
,
TFRobertaForMaskedLM
,
TFRobertaForSequenceClassification
from
.modeling_tf_distilbert
import
TFDistilBertModel
,
TFDistilBertForQuestionAnswering
,
TFDistilBertForMaskedLM
,
TFDistilBertForSequenceClassification
from
.modeling_tf_distilbert
import
TFDistilBertModel
,
TFDistilBertForQuestionAnswering
,
TFDistilBertForMaskedLM
,
TFDistilBertForSequenceClassification
from
.modeling_tf_ctrl
import
TFCTRLModel
,
TFCTRLLMHeadModel
from
.modeling_tf_ctrl
import
TFCTRLModel
,
TFCTRLLMHeadModel
from
.modeling_tf_t5
import
TFT5Model
,
TFT5WithLMHeadModel
from
.file_utils
import
add_start_docstrings
from
.file_utils
import
add_start_docstrings
...
@@ -45,6 +46,7 @@ class TFAutoModel(object):
...
@@ -45,6 +46,7 @@ class TFAutoModel(object):
The base model class to instantiate is selected as the first pattern matching
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
in the `pretrained_model_name_or_path` string (in the following order):
- contains `t5`: TFT5Model (T5 model)
- contains `distilbert`: TFDistilBertModel (DistilBERT model)
- contains `distilbert`: TFDistilBertModel (DistilBERT model)
- contains `roberta`: TFRobertaModel (RoBERTa model)
- contains `roberta`: TFRobertaModel (RoBERTa model)
- contains `bert`: TFBertModel (Bert model)
- contains `bert`: TFBertModel (Bert model)
...
@@ -68,6 +70,7 @@ class TFAutoModel(object):
...
@@ -68,6 +70,7 @@ class TFAutoModel(object):
The model class to instantiate is selected as the first pattern matching
The model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
in the `pretrained_model_name_or_path` string (in the following order):
- contains `t5`: TFT5Model (T5 model)
- contains `distilbert`: TFDistilBertModel (DistilBERT model)
- contains `distilbert`: TFDistilBertModel (DistilBERT model)
- contains `roberta`: TFRobertaModel (RoBERTa model)
- contains `roberta`: TFRobertaModel (RoBERTa model)
- contains `bert`: TFTFBertModel (Bert model)
- contains `bert`: TFTFBertModel (Bert model)
...
@@ -137,7 +140,9 @@ class TFAutoModel(object):
...
@@ -137,7 +140,9 @@ class TFAutoModel(object):
model = TFAutoModel.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
model = TFAutoModel.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
"""
"""
if
'distilbert'
in
pretrained_model_name_or_path
:
if
't5'
in
pretrained_model_name_or_path
:
return
TFT5Model
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'distilbert'
in
pretrained_model_name_or_path
:
return
TFDistilBertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
return
TFDistilBertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'roberta'
in
pretrained_model_name_or_path
:
elif
'roberta'
in
pretrained_model_name_or_path
:
return
TFRobertaModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
return
TFRobertaModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
...
@@ -173,6 +178,7 @@ class TFAutoModelWithLMHead(object):
...
@@ -173,6 +178,7 @@ class TFAutoModelWithLMHead(object):
The model class to instantiate is selected as the first pattern matching
The model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
in the `pretrained_model_name_or_path` string (in the following order):
- contains `t5`: TFT5WithLMHeadModel (T5 model)
- contains `distilbert`: TFDistilBertForMaskedLM (DistilBERT model)
- contains `distilbert`: TFDistilBertForMaskedLM (DistilBERT model)
- contains `roberta`: TFRobertaForMaskedLM (RoBERTa model)
- contains `roberta`: TFRobertaForMaskedLM (RoBERTa model)
- contains `bert`: TFBertForMaskedLM (Bert model)
- contains `bert`: TFBertForMaskedLM (Bert model)
...
@@ -199,6 +205,7 @@ class TFAutoModelWithLMHead(object):
...
@@ -199,6 +205,7 @@ class TFAutoModelWithLMHead(object):
The model class to instantiate is selected as the first pattern matching
The model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
in the `pretrained_model_name_or_path` string (in the following order):
- contains `t5`: TFT5WithLMHeadModel (T5 model)
- contains `distilbert`: TFDistilBertForMaskedLM (DistilBERT model)
- contains `distilbert`: TFDistilBertForMaskedLM (DistilBERT model)
- contains `roberta`: TFRobertaForMaskedLM (RoBERTa model)
- contains `roberta`: TFRobertaForMaskedLM (RoBERTa model)
- contains `bert`: TFBertForMaskedLM (Bert model)
- contains `bert`: TFBertForMaskedLM (Bert model)
...
@@ -269,7 +276,9 @@ class TFAutoModelWithLMHead(object):
...
@@ -269,7 +276,9 @@ class TFAutoModelWithLMHead(object):
model = TFAutoModelWithLMHead.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
model = TFAutoModelWithLMHead.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
"""
"""
if
'distilbert'
in
pretrained_model_name_or_path
:
if
't5'
in
pretrained_model_name_or_path
:
return
TFT5WithLMHeadModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'distilbert'
in
pretrained_model_name_or_path
:
return
TFDistilBertForMaskedLM
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
return
TFDistilBertForMaskedLM
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'roberta'
in
pretrained_model_name_or_path
:
elif
'roberta'
in
pretrained_model_name_or_path
:
return
TFRobertaForMaskedLM
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
return
TFRobertaForMaskedLM
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
...
...
transformers/modeling_tf_pytorch_utils.py
View file @
a52d56c8
...
@@ -78,6 +78,7 @@ def load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_path, tf_i
...
@@ -78,6 +78,7 @@ def load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_path, tf_i
logger
.
info
(
"Loading PyTorch weights from {}"
.
format
(
pt_path
))
logger
.
info
(
"Loading PyTorch weights from {}"
.
format
(
pt_path
))
pt_state_dict
=
torch
.
load
(
pt_path
,
map_location
=
'cpu'
)
pt_state_dict
=
torch
.
load
(
pt_path
,
map_location
=
'cpu'
)
logger
.
info
(
"PyTorch checkpoint contains {:,} parameters"
.
format
(
sum
(
t
.
numel
()
for
t
in
pt_state_dict
.
values
())))
return
load_pytorch_weights_in_tf2_model
(
tf_model
,
pt_state_dict
,
tf_inputs
=
tf_inputs
,
allow_missing_keys
=
allow_missing_keys
)
return
load_pytorch_weights_in_tf2_model
(
tf_model
,
pt_state_dict
,
tf_inputs
=
tf_inputs
,
allow_missing_keys
=
allow_missing_keys
)
...
@@ -134,7 +135,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
...
@@ -134,7 +135,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
start_prefix_to_remove
=
tf_model
.
base_model_prefix
+
'.'
start_prefix_to_remove
=
tf_model
.
base_model_prefix
+
'.'
symbolic_weights
=
tf_model
.
trainable_weights
+
tf_model
.
non_trainable_weights
symbolic_weights
=
tf_model
.
trainable_weights
+
tf_model
.
non_trainable_weights
tf_loaded_numel
=
0
weight_value_tuples
=
[]
weight_value_tuples
=
[]
all_pytorch_weights
=
set
(
list
(
pt_state_dict
.
keys
()))
all_pytorch_weights
=
set
(
list
(
pt_state_dict
.
keys
()))
for
symbolic_weight
in
symbolic_weights
:
for
symbolic_weight
in
symbolic_weights
:
...
@@ -159,7 +160,8 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
...
@@ -159,7 +160,8 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
e
.
args
+=
(
symbolic_weight
.
shape
,
array
.
shape
)
e
.
args
+=
(
symbolic_weight
.
shape
,
array
.
shape
)
raise
e
raise
e
logger
.
info
(
"Initialize TF weight {}"
.
format
(
symbolic_weight
.
name
))
tf_loaded_numel
+=
array
.
size
# logger.warning("Initialize TF weight {}".format(symbolic_weight.name))
weight_value_tuples
.
append
((
symbolic_weight
,
array
))
weight_value_tuples
.
append
((
symbolic_weight
,
array
))
all_pytorch_weights
.
discard
(
name
)
all_pytorch_weights
.
discard
(
name
)
...
@@ -169,6 +171,8 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
...
@@ -169,6 +171,8 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
if
tf_inputs
is
not
None
:
if
tf_inputs
is
not
None
:
tfo
=
tf_model
(
tf_inputs
,
training
=
False
)
# Make sure restore ops are run
tfo
=
tf_model
(
tf_inputs
,
training
=
False
)
# Make sure restore ops are run
logger
.
info
(
"Loaded {:,} parameters in the TF 2.0 model."
.
format
(
tf_loaded_numel
))
logger
.
info
(
"Weights or buffers not loaded from PyTorch model: {}"
.
format
(
all_pytorch_weights
))
logger
.
info
(
"Weights or buffers not loaded from PyTorch model: {}"
.
format
(
all_pytorch_weights
))
return
tf_model
return
tf_model
...
@@ -272,7 +276,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
...
@@ -272,7 +276,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
e
.
args
+=
(
pt_weight
.
shape
,
array
.
shape
)
e
.
args
+=
(
pt_weight
.
shape
,
array
.
shape
)
raise
e
raise
e
logger
.
info
(
"Initialize PyTorch weight {}"
.
format
(
pt_weight_name
))
#
logger.
warning
("Initialize PyTorch weight {}".format(pt_weight_name))
new_pt_params_dict
[
pt_weight_name
]
=
torch
.
from_numpy
(
array
)
new_pt_params_dict
[
pt_weight_name
]
=
torch
.
from_numpy
(
array
)
loaded_pt_weights_data_ptr
[
pt_weight
.
data_ptr
()]
=
torch
.
from_numpy
(
array
)
loaded_pt_weights_data_ptr
[
pt_weight
.
data_ptr
()]
=
torch
.
from_numpy
(
array
)
...
...
transformers/modeling_tf_t5.py
0 → 100644
View file @
a52d56c8
This diff is collapsed.
Click to expand it.
transformers/modeling_tf_utils.py
View file @
a52d56c8
...
@@ -24,14 +24,12 @@ import os
...
@@ -24,14 +24,12 @@ import os
import
tensorflow
as
tf
import
tensorflow
as
tf
from
.configuration_utils
import
PretrainedConfig
from
.configuration_utils
import
PretrainedConfig
from
.file_utils
import
(
TF2_WEIGHTS_NAME
,
TF_WEIGHTS_NAME
,
WEIGHTS_NAME
,
from
.file_utils
import
(
TF2_WEIGHTS_NAME
,
TF_WEIGHTS_NAME
,
WEIGHTS_NAME
,
DUMMY_INPUTS
,
cached_path
,
hf_bucket_url
,
is_remote_url
)
cached_path
,
hf_bucket_url
,
is_remote_url
)
from
.modeling_tf_pytorch_utils
import
load_pytorch_checkpoint_in_tf2_model
from
.modeling_tf_pytorch_utils
import
load_pytorch_checkpoint_in_tf2_model
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
DUMMY_INPUTS
=
[[
7
,
6
,
0
,
0
,
1
],
[
1
,
2
,
3
,
0
,
0
],
[
0
,
0
,
0
,
4
,
5
]]
class
TFPreTrainedModel
(
tf
.
keras
.
Model
):
class
TFPreTrainedModel
(
tf
.
keras
.
Model
):
r
""" Base class for all TF models.
r
""" Base class for all TF models.
...
@@ -60,7 +58,7 @@ class TFPreTrainedModel(tf.keras.Model):
...
@@ -60,7 +58,7 @@ class TFPreTrainedModel(tf.keras.Model):
Returns:
Returns:
tf.Tensor with dummy inputs
tf.Tensor with dummy inputs
"""
"""
return
tf
.
constant
(
DUMMY_INPUTS
)
return
{
'input_ids'
:
tf
.
constant
(
DUMMY_INPUTS
)
}
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFPreTrainedModel
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
super
(
TFPreTrainedModel
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
...
...
transformers/modeling_tf_xlm.py
View file @
a52d56c8
...
@@ -460,7 +460,7 @@ class TFXLMPreTrainedModel(TFPreTrainedModel):
...
@@ -460,7 +460,7 @@ class TFXLMPreTrainedModel(TFPreTrainedModel):
langs_list
=
tf
.
constant
([[
1
,
1
,
0
,
0
,
1
],
[
1
,
1
,
1
,
0
,
0
],
[
1
,
0
,
0
,
1
,
1
]])
langs_list
=
tf
.
constant
([[
1
,
1
,
0
,
0
,
1
],
[
1
,
1
,
1
,
0
,
0
],
[
1
,
0
,
0
,
1
,
1
]])
else
:
else
:
langs_list
=
None
langs_list
=
None
return
[
inputs_list
,
attns_list
,
langs_list
]
return
{
'input_ids'
:
inputs_list
,
'attention_mask'
:
attns_list
,
'langs'
:
langs_list
}
XLM_START_DOCSTRING
=
r
""" The XLM model was proposed in
XLM_START_DOCSTRING
=
r
""" The XLM model was proposed in
...
...
transformers/modeling_utils.py
View file @
a52d56c8
...
@@ -31,12 +31,11 @@ from torch.nn import CrossEntropyLoss
...
@@ -31,12 +31,11 @@ from torch.nn import CrossEntropyLoss
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
from
.configuration_utils
import
PretrainedConfig
from
.configuration_utils
import
PretrainedConfig
from
.file_utils
import
(
TF2_WEIGHTS_NAME
,
TF_WEIGHTS_NAME
,
WEIGHTS_NAME
,
from
.file_utils
import
(
TF2_WEIGHTS_NAME
,
TF_WEIGHTS_NAME
,
WEIGHTS_NAME
,
DUMMY_INPUTS
,
cached_path
,
hf_bucket_url
,
is_remote_url
)
cached_path
,
hf_bucket_url
,
is_remote_url
)
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
try
:
try
:
from
torch.nn
import
Identity
from
torch.nn
import
Identity
except
ImportError
:
except
ImportError
:
...
@@ -72,6 +71,15 @@ class PreTrainedModel(nn.Module):
...
@@ -72,6 +71,15 @@ class PreTrainedModel(nn.Module):
load_tf_weights
=
lambda
model
,
config
,
path
:
None
load_tf_weights
=
lambda
model
,
config
,
path
:
None
base_model_prefix
=
""
base_model_prefix
=
""
@
property
def
dummy_inputs
(
self
):
""" Dummy inputs to do a forward pass in the network.
Returns:
torch.Tensor with dummy inputs
"""
return
{
'input_ids'
:
torch
.
tensor
(
DUMMY_INPUTS
)}
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
PreTrainedModel
,
self
).
__init__
()
super
(
PreTrainedModel
,
self
).
__init__
()
if
not
isinstance
(
config
,
PretrainedConfig
):
if
not
isinstance
(
config
,
PretrainedConfig
):
...
@@ -161,8 +169,7 @@ class PreTrainedModel(nn.Module):
...
@@ -161,8 +169,7 @@ class PreTrainedModel(nn.Module):
base_model
.
vocab_size
=
new_num_tokens
base_model
.
vocab_size
=
new_num_tokens
# Tie weights again if needed
# Tie weights again if needed
if
hasattr
(
self
,
'tie_weights'
):
self
.
tie_weights
()
self
.
tie_weights
()
return
model_embeds
return
model_embeds
...
@@ -478,8 +485,7 @@ class PreTrainedModel(nn.Module):
...
@@ -478,8 +485,7 @@ class PreTrainedModel(nn.Module):
raise
RuntimeError
(
'Error(s) in loading state_dict for {}:
\n\t
{}'
.
format
(
raise
RuntimeError
(
'Error(s) in loading state_dict for {}:
\n\t
{}'
.
format
(
model
.
__class__
.
__name__
,
"
\n\t
"
.
join
(
error_msgs
)))
model
.
__class__
.
__name__
,
"
\n\t
"
.
join
(
error_msgs
)))
if
hasattr
(
model
,
'tie_weights'
):
model
.
tie_weights
()
# make sure word embedding weights are still tied if needed
model
.
tie_weights
()
# make sure word embedding weights are still tied
# Set model in evaluation mode to desactivate DropOut modules by default
# Set model in evaluation mode to desactivate DropOut modules by default
model
.
eval
()
model
.
eval
()
...
...
transformers/modeling_xlm.py
View file @
a52d56c8
...
@@ -227,6 +227,16 @@ class XLMPreTrainedModel(PreTrainedModel):
...
@@ -227,6 +227,16 @@ class XLMPreTrainedModel(PreTrainedModel):
def
__init__
(
self
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
*
inputs
,
**
kwargs
):
super
(
XLMPreTrainedModel
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
super
(
XLMPreTrainedModel
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
@
property
def
dummy_inputs
(
self
):
inputs_list
=
torch
.
tensor
([[
7
,
6
,
0
,
0
,
1
],
[
1
,
2
,
3
,
0
,
0
],
[
0
,
0
,
0
,
4
,
5
]])
attns_list
=
torch
.
tensor
([[
1
,
1
,
0
,
0
,
1
],
[
1
,
1
,
1
,
0
,
0
],
[
1
,
0
,
0
,
1
,
1
]])
if
self
.
config
.
use_lang_emb
and
self
.
config
.
n_langs
>
1
:
langs_list
=
torch
.
tensor
([[
1
,
1
,
0
,
0
,
1
],
[
1
,
1
,
1
,
0
,
0
],
[
1
,
0
,
0
,
1
,
1
]])
else
:
langs_list
=
None
return
{
'input_ids'
:
inputs_list
,
'attention_mask'
:
attns_list
,
'langs'
:
langs_list
}
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
""" Initialize the weights. """
""" Initialize the weights. """
if
isinstance
(
module
,
nn
.
Embedding
):
if
isinstance
(
module
,
nn
.
Embedding
):
...
@@ -646,7 +656,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
...
@@ -646,7 +656,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
langs
=
langs
,
langs
=
langs
,
token_type_ids
=
token_type_ids
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
position_ids
=
position_ids
,
lengths
=
lengths
,
lengths
=
lengths
,
cache
=
cache
,
cache
=
cache
,
head_mask
=
head_mask
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
)
inputs_embeds
=
inputs_embeds
)
...
...
transformers/tests/fixtures/empty.txt
0 → 100644
View file @
a52d56c8
transformers/tests/hf_api_test.py
View file @
a52d56c8
...
@@ -15,18 +15,30 @@
...
@@ -15,18 +15,30 @@
from
__future__
import
absolute_import
,
division
,
print_function
from
__future__
import
absolute_import
,
division
,
print_function
import
os
import
os
import
six
import
time
import
time
import
unittest
import
unittest
from
transformers.hf_api
import
HfApi
,
S3Obj
,
PresignedUrl
,
HfFolder
,
HTTPError
import
requests
import
six
from
transformers.hf_api
import
HfApi
,
HfFolder
,
HTTPError
,
PresignedUrl
,
S3Obj
USER
=
"__DUMMY_TRANSFORMERS_USER__"
USER
=
"__DUMMY_TRANSFORMERS_USER__"
PASS
=
"__DUMMY_TRANSFORMERS_PASS__"
PASS
=
"__DUMMY_TRANSFORMERS_PASS__"
FILE_KEY
=
"Test-{}.txt"
.
format
(
int
(
time
.
time
()))
FILES
=
[
FILE_PATH
=
os
.
path
.
join
(
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
"fixtures/input.txt"
"Test-{}.txt"
.
format
(
int
(
time
.
time
())),
)
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
"fixtures/input.txt"
)
),
(
"yoyo {}.txt"
.
format
(
int
(
time
.
time
())),
# space is intentional
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
"fixtures/empty.txt"
)
),
]
...
@@ -57,15 +69,21 @@ class HfApiEndpointsTest(HfApiCommonTest):
...
@@ -57,15 +69,21 @@ class HfApiEndpointsTest(HfApiCommonTest):
self
.
assertEqual
(
user
,
USER
)
self
.
assertEqual
(
user
,
USER
)
def
test_presign
(
self
):
def
test_presign
(
self
):
urls
=
self
.
_api
.
presign
(
token
=
self
.
_token
,
filename
=
FILE_KEY
)
for
FILE_KEY
,
FILE_PATH
in
FILES
:
self
.
assertIsInstance
(
urls
,
PresignedUrl
)
urls
=
self
.
_api
.
presign
(
token
=
self
.
_token
,
filename
=
FILE_KEY
)
self
.
assertEqual
(
urls
.
type
,
"text/plain"
)
self
.
assertIsInstance
(
urls
,
PresignedUrl
)
self
.
assertEqual
(
urls
.
type
,
"text/plain"
)
def
test_presign_and_upload
(
self
):
def
test_presign_and_upload
(
self
):
access_url
=
self
.
_api
.
presign_and_upload
(
for
FILE_KEY
,
FILE_PATH
in
FILES
:
token
=
self
.
_token
,
filename
=
FILE_KEY
,
filepath
=
FILE_PATH
access_url
=
self
.
_api
.
presign_and_upload
(
)
token
=
self
.
_token
,
filename
=
FILE_KEY
,
filepath
=
FILE_PATH
self
.
assertIsInstance
(
access_url
,
six
.
string_types
)
)
self
.
assertIsInstance
(
access_url
,
six
.
string_types
)
with
open
(
FILE_PATH
,
'r'
)
as
f
:
body
=
f
.
read
()
r
=
requests
.
get
(
access_url
)
self
.
assertEqual
(
r
.
text
,
body
)
def
test_list_objs
(
self
):
def
test_list_objs
(
self
):
objs
=
self
.
_api
.
list_objs
(
token
=
self
.
_token
)
objs
=
self
.
_api
.
list_objs
(
token
=
self
.
_token
)
...
...
transformers/tests/modeling_common_test.py
View file @
a52d56c8
...
@@ -58,7 +58,7 @@ else:
...
@@ -58,7 +58,7 @@ else:
def
_config_zero_init
(
config
):
def
_config_zero_init
(
config
):
configs_no_init
=
copy
.
deepcopy
(
config
)
configs_no_init
=
copy
.
deepcopy
(
config
)
for
key
in
configs_no_init
.
__dict__
.
keys
():
for
key
in
configs_no_init
.
__dict__
.
keys
():
if
'_range'
in
key
or
'_std'
in
key
:
if
'_range'
in
key
or
'_std'
in
key
or
'initializer_factor'
in
key
:
setattr
(
configs_no_init
,
key
,
0.0
)
setattr
(
configs_no_init
,
key
,
0.0
)
return
configs_no_init
return
configs_no_init
...
@@ -73,6 +73,7 @@ class CommonTestCases:
...
@@ -73,6 +73,7 @@ class CommonTestCases:
test_pruning
=
True
test_pruning
=
True
test_resize_embeddings
=
True
test_resize_embeddings
=
True
test_head_masking
=
True
test_head_masking
=
True
is_encoder_decoder
=
False
def
test_save_load
(
self
):
def
test_save_load
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -83,6 +84,8 @@ class CommonTestCases:
...
@@ -83,6 +84,8 @@ class CommonTestCases:
model
.
eval
()
model
.
eval
()
with
torch
.
no_grad
():
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
outputs
=
model
(
**
inputs_dict
)
out_2
=
outputs
[
0
].
numpy
()
out_2
[
np
.
isnan
(
out_2
)]
=
0
with
TemporaryDirectory
()
as
tmpdirname
:
with
TemporaryDirectory
()
as
tmpdirname
:
model
.
save_pretrained
(
tmpdirname
)
model
.
save_pretrained
(
tmpdirname
)
...
@@ -93,9 +96,7 @@ class CommonTestCases:
...
@@ -93,9 +96,7 @@ class CommonTestCases:
# Make sure we don't have nans
# Make sure we don't have nans
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_2
=
outputs
[
0
].
cpu
().
numpy
()
out_1
[
np
.
isnan
(
out_1
)]
=
0
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
self
.
assertLessEqual
(
max_diff
,
1e-5
)
...
@@ -117,20 +118,32 @@ class CommonTestCases:
...
@@ -117,20 +118,32 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
first
,
second
=
model
(
inputs_dict
[
"input_ids"
])[
0
],
model
(
inputs_dict
[
"input_ids"
])[
0
]
with
torch
.
no_grad
():
self
.
assertEqual
(
first
.
ne
(
second
).
sum
().
item
(),
0
)
first
=
model
(
**
inputs_dict
)[
0
]
second
=
model
(
**
inputs_dict
)[
0
]
out_1
=
first
.
cpu
().
numpy
()
out_2
=
second
.
cpu
().
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
test_attention_outputs
(
self
):
def
test_attention_outputs
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
decoder_seq_length
=
self
.
model_tester
.
decoder_seq_length
if
hasattr
(
self
.
model_tester
,
'decoder_seq_length'
)
else
self
.
model_tester
.
seq_length
encoder_seq_length
=
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
decoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
decoder_seq_length
encoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
encoder_seq_length
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
config
.
output_hidden_states
=
False
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
...
@@ -138,28 +151,42 @@ class CommonTestCases:
...
@@ -138,28 +151,42 @@ class CommonTestCases:
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
encoder_
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_key
_length
])
out_len
=
len
(
outputs
)
out_len
=
len
(
outputs
)
if
self
.
is_encoder_decoder
:
self
.
assertEqual
(
out_len
%
2
,
0
)
decoder_attentions
=
outputs
[(
out_len
//
2
)
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
decoder_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
decoder_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
decoder_seq_length
,
decoder_key_length
])
# Check attention is always last and order is fine
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
self
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
outputs
=
model
(
**
inputs_dict
)
self
.
assertEqual
(
out_len
+
(
2
if
self
.
is_encoder_decoder
else
1
),
len
(
outputs
))
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
attentions
=
outputs
[
-
1
]
self_
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
len
(
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertEqual
(
len
(
self_
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
self_
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
encoder_
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_key
_length
])
def
test_torchscript
(
self
):
def
test_torchscript
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -223,7 +250,6 @@ class CommonTestCases:
...
@@ -223,7 +250,6 @@ class CommonTestCases:
self
.
assertTrue
(
models_equal
)
self
.
assertTrue
(
models_equal
)
def
test_headmasking
(
self
):
def
test_headmasking
(
self
):
if
not
self
.
test_head_masking
:
if
not
self
.
test_head_masking
:
return
return
...
@@ -278,7 +304,6 @@ class CommonTestCases:
...
@@ -278,7 +304,6 @@ class CommonTestCases:
self
.
assertNotEqual
(
self
.
assertNotEqual
(
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
def
test_head_pruning
(
self
):
def
test_head_pruning
(
self
):
if
not
self
.
test_pruning
:
if
not
self
.
test_pruning
:
return
return
...
@@ -297,7 +322,8 @@ class CommonTestCases:
...
@@ -297,7 +322,8 @@ class CommonTestCases:
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
-
1
:
[
0
]}
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
model
.
prune_heads
(
heads_to_prune
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
...
@@ -333,7 +359,8 @@ class CommonTestCases:
...
@@ -333,7 +359,8 @@ class CommonTestCases:
model
=
model_class
.
from_pretrained
(
directory
)
model
=
model_class
.
from_pretrained
(
directory
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
...
@@ -362,7 +389,8 @@ class CommonTestCases:
...
@@ -362,7 +389,8 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
...
@@ -389,7 +417,8 @@ class CommonTestCases:
...
@@ -389,7 +417,8 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
@@ -406,7 +435,8 @@ class CommonTestCases:
...
@@ -406,7 +435,8 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
shutil
.
rmtree
(
directory
)
shutil
.
rmtree
(
directory
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
@@ -417,7 +447,8 @@ class CommonTestCases:
...
@@ -417,7 +447,8 @@ class CommonTestCases:
heads_to_prune
=
{
0
:
[
0
],
2
:
[
1
,
2
]}
heads_to_prune
=
{
0
:
[
0
],
2
:
[
1
,
2
]}
model
.
prune_heads
(
heads_to_prune
)
model
.
prune_heads
(
heads_to_prune
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
@@ -427,7 +458,6 @@ class CommonTestCases:
...
@@ -427,7 +458,6 @@ class CommonTestCases:
self
.
assertDictEqual
(
model
.
config
.
pruned_heads
,
{
0
:
[
0
],
1
:
[
1
,
2
],
2
:
[
1
,
2
]})
self
.
assertDictEqual
(
model
.
config
.
pruned_heads
,
{
0
:
[
0
],
1
:
[
1
,
2
],
2
:
[
1
,
2
]})
def
test_hidden_states_output
(
self
):
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -437,14 +467,16 @@ class CommonTestCases:
...
@@ -437,14 +467,16 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
hidden_states
=
outputs
[
-
1
]
hidden_states
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
])
[
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
])
def
test_resize_tokens_embeddings
(
self
):
def
test_resize_tokens_embeddings
(
self
):
original_config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
original_config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -550,8 +582,14 @@ class CommonTestCases:
...
@@ -550,8 +582,14 @@ class CommonTestCases:
def
test_inputs_embeds
(
self
):
def
test_inputs_embeds
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
inputs_dict
[
"input_ids"
]
if
not
self
.
is_encoder_decoder
:
del
inputs_dict
[
"input_ids"
]
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
else
:
encoder_input_ids
=
inputs_dict
[
"encoder_input_ids"
]
decoder_input_ids
=
inputs_dict
[
"decoder_input_ids"
]
del
inputs_dict
[
"encoder_input_ids"
]
del
inputs_dict
[
"decoder_input_ids"
]
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
...
@@ -559,9 +597,14 @@ class CommonTestCases:
...
@@ -559,9 +597,14 @@ class CommonTestCases:
model
.
eval
()
model
.
eval
()
wte
=
model
.
get_input_embeddings
()
wte
=
model
.
get_input_embeddings
()
inputs_dict
[
"inputs_embeds"
]
=
wte
(
input_ids
)
if
not
self
.
is_encoder_decoder
:
outputs
=
model
(
**
inputs_dict
)
inputs_dict
[
"inputs_embeds"
]
=
wte
(
input_ids
)
else
:
inputs_dict
[
"encoder_inputs_embeds"
]
=
wte
(
encoder_input_ids
)
inputs_dict
[
"decoder_inputs_embeds"
]
=
wte
(
decoder_input_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
class
GPTModelTester
(
CommonModelTester
):
class
GPTModelTester
(
CommonModelTester
):
...
@@ -649,9 +692,10 @@ class CommonTestCases:
...
@@ -649,9 +692,10 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
model
(
input_ids
)
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
)
hidden_state
=
outputs
[
0
]
hidden_state
=
outputs
[
0
]
self
.
parent
.
assertListEqual
(
self
.
parent
.
assertListEqual
(
...
@@ -664,7 +708,8 @@ class CommonTestCases:
...
@@ -664,7 +708,8 @@ class CommonTestCases:
model
=
self
.
lm_head_model_class
(
config
)
model
=
self
.
lm_head_model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
loss
,
lm_logits
=
outputs
[:
2
]
loss
,
lm_logits
=
outputs
[:
2
]
total_voc
=
self
.
vocab_size
total_voc
=
self
.
vocab_size
...
@@ -681,7 +726,8 @@ class CommonTestCases:
...
@@ -681,7 +726,8 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
)
presents
=
outputs
[
-
1
]
presents
=
outputs
[
-
1
]
self
.
parent
.
assertEqual
(
self
.
num_hidden_layers
,
len
(
presents
))
self
.
parent
.
assertEqual
(
self
.
num_hidden_layers
,
len
(
presents
))
self
.
parent
.
assertListEqual
(
self
.
parent
.
assertListEqual
(
...
@@ -694,7 +740,8 @@ class CommonTestCases:
...
@@ -694,7 +740,8 @@ class CommonTestCases:
model
=
self
.
double_head_model_class
(
config
)
model
=
self
.
double_head_model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
)
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
)
lm_loss
,
mc_loss
,
lm_logits
,
mc_logits
=
outputs
[:
4
]
lm_loss
,
mc_loss
,
lm_logits
,
mc_logits
=
outputs
[:
4
]
loss
=
[
lm_loss
,
mc_loss
]
loss
=
[
lm_loss
,
mc_loss
]
...
...
transformers/tests/modeling_t5_test.py
0 → 100644
View file @
a52d56c8
# coding=utf-8
# Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
import
shutil
from
transformers
import
is_torch_available
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
,
floats_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
if
is_torch_available
():
from
transformers
import
(
T5Config
,
T5Model
,
T5WithLMHeadModel
)
from
transformers.modeling_t5
import
T5_PRETRAINED_MODEL_ARCHIVE_MAP
@
require_torch
class
T5ModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
T5Model
,
T5WithLMHeadModel
)
if
is_torch_available
()
else
()
test_pruning
=
False
test_torchscript
=
False
test_resize_embeddings
=
False
is_encoder_decoder
=
True
class
T5ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
encoder_seq_length
=
7
,
decoder_seq_length
=
9
,
is_training
=
True
,
use_attention_mask
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
14
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
d_ff
=
37
,
relative_attention_num_buckets
=
8
,
dropout_rate
=
0.1
,
initializer_factor
=
0.002
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
encoder_seq_length
=
encoder_seq_length
self
.
decoder_seq_length
=
decoder_seq_length
self
.
is_training
=
is_training
self
.
use_attention_mask
=
use_attention_mask
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
d_ff
=
d_ff
self
.
relative_attention_num_buckets
=
relative_attention_num_buckets
self
.
dropout_rate
=
dropout_rate
self
.
initializer_factor
=
initializer_factor
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
encoder_input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
self
.
vocab_size
)
decoder_input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
encoder_attention_mask
=
None
decoder_attention_mask
=
None
if
self
.
use_attention_mask
:
encoder_attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
vocab_size
=
2
)
decoder_attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
vocab_size
=
2
)
decoder_lm_labels
=
None
if
self
.
use_labels
:
decoder_lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size_or_config_json_file
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
)
return
(
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_t5_model
(
self
,
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
):
model
=
T5Model
(
config
=
config
)
model
.
eval
()
decoder_output
,
encoder_output
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
,
encoder_attention_mask
=
encoder_attention_mask
,
decoder_attention_mask
=
decoder_attention_mask
)
decoder_output
,
encoder_output
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
)
result
=
{
"encoder_output"
:
encoder_output
,
"decoder_output"
:
decoder_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
encoder_seq_length
,
self
.
hidden_size
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
hidden_size
])
def
create_and_check_t5_with_lm_head
(
self
,
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
):
model
=
T5WithLMHeadModel
(
config
=
config
)
model
.
eval
()
outputs
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
,
decoder_attention_mask
=
decoder_attention_mask
,
decoder_lm_labels
=
decoder_lm_labels
)
loss
,
prediction_scores
=
outputs
[
0
],
outputs
[
1
]
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
vocab_size
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
)
=
config_and_inputs
inputs_dict
=
{
'encoder_input_ids'
:
encoder_input_ids
,
'decoder_input_ids'
:
decoder_input_ids
,
'decoder_attention_mask'
:
decoder_attention_mask
,
'encoder_attention_mask'
:
encoder_attention_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
T5ModelTest
.
T5ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
T5Config
,
d_model
=
37
)
def
test_config
(
self
):
self
.
config_tester
.
run_common_tests
()
def
test_t5_model
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_model
(
*
config_and_inputs
)
def
test_with_lm_head
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_with_lm_head
(
*
config_and_inputs
)
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
T5_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
T5Model
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
transformers/tests/modeling_tf_common_test.py
View file @
a52d56c8
...
@@ -69,6 +69,7 @@ class TFCommonTestCases:
...
@@ -69,6 +69,7 @@ class TFCommonTestCases:
test_torchscript
=
True
test_torchscript
=
True
test_pruning
=
True
test_pruning
=
True
test_resize_embeddings
=
True
test_resize_embeddings
=
True
is_encoder_decoder
=
False
def
test_initialization
(
self
):
def
test_initialization
(
self
):
pass
pass
...
@@ -129,8 +130,12 @@ class TFCommonTestCases:
...
@@ -129,8 +130,12 @@ class TFCommonTestCases:
for
name
,
key
in
inputs_dict
.
items
())
for
name
,
key
in
inputs_dict
.
items
())
with
torch
.
no_grad
():
with
torch
.
no_grad
():
pto
=
pt_model
(
**
pt_inputs_dict
)
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
tfo
=
tf_model
(
inputs_dict
,
training
=
False
)
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
[
0
].
numpy
()
-
pto
[
0
].
numpy
()))
tf_hidden_states
=
tfo
[
0
].
numpy
()
pt_hidden_states
=
pto
[
0
].
numpy
()
tf_hidden_states
[
np
.
isnan
(
tf_hidden_states
)]
=
0
pt_hidden_states
[
np
.
isnan
(
pt_hidden_states
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
tf_hidden_states
-
pt_hidden_states
))
self
.
assertLessEqual
(
max_diff
,
2e-2
)
self
.
assertLessEqual
(
max_diff
,
2e-2
)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
...
@@ -150,13 +155,21 @@ class TFCommonTestCases:
...
@@ -150,13 +155,21 @@ class TFCommonTestCases:
with
torch
.
no_grad
():
with
torch
.
no_grad
():
pto
=
pt_model
(
**
pt_inputs_dict
)
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
[
0
].
numpy
()
-
pto
[
0
].
numpy
()))
tfo
=
tfo
[
0
].
numpy
()
pto
=
pto
[
0
].
numpy
()
tfo
[
np
.
isnan
(
tfo
)]
=
0
pto
[
np
.
isnan
(
pto
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
-
pto
))
self
.
assertLessEqual
(
max_diff
,
2e-2
)
self
.
assertLessEqual
(
max_diff
,
2e-2
)
def
test_compile_tf_model
(
self
):
def
test_compile_tf_model
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'input_ids'
,
dtype
=
'int32'
)
if
self
.
is_encoder_decoder
:
input_ids
=
{
'decoder_input_ids'
:
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'decoder_input_ids'
,
dtype
=
'int32'
),
'encoder_input_ids'
:
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'encoder_input_ids'
,
dtype
=
'int32'
)}
else
:
input_ids
=
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'input_ids'
,
dtype
=
'int32'
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
3e-5
,
epsilon
=
1e-08
,
clipnorm
=
1.0
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
3e-5
,
epsilon
=
1e-08
,
clipnorm
=
1.0
)
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
True
)
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
True
)
metric
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
'accuracy'
)
metric
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
'accuracy'
)
...
@@ -189,7 +202,7 @@ class TFCommonTestCases:
...
@@ -189,7 +202,7 @@ class TFCommonTestCases:
outputs_dict
=
model
(
inputs_dict
)
outputs_dict
=
model
(
inputs_dict
)
inputs_keywords
=
copy
.
deepcopy
(
inputs_dict
)
inputs_keywords
=
copy
.
deepcopy
(
inputs_dict
)
input_ids
=
inputs_keywords
.
pop
(
'input_ids'
)
input_ids
=
inputs_keywords
.
pop
(
'input_ids'
if
not
self
.
is_encoder_decoder
else
'decoder_input_ids'
,
None
)
outputs_keywords
=
model
(
input_ids
,
**
inputs_keywords
)
outputs_keywords
=
model
(
input_ids
,
**
inputs_keywords
)
output_dict
=
outputs_dict
[
0
].
numpy
()
output_dict
=
outputs_dict
[
0
].
numpy
()
...
@@ -200,6 +213,11 @@ class TFCommonTestCases:
...
@@ -200,6 +213,11 @@ class TFCommonTestCases:
def
test_attention_outputs
(
self
):
def
test_attention_outputs
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
decoder_seq_length
=
self
.
model_tester
.
decoder_seq_length
if
hasattr
(
self
.
model_tester
,
'decoder_seq_length'
)
else
self
.
model_tester
.
seq_length
encoder_seq_length
=
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
decoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
decoder_seq_length
encoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
encoder_seq_length
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
config
.
output_hidden_states
=
False
...
@@ -212,16 +230,28 @@ class TFCommonTestCases:
...
@@ -212,16 +230,28 @@ class TFCommonTestCases:
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
encoder_
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_key
_length
])
out_len
=
len
(
outputs
)
out_len
=
len
(
outputs
)
if
self
.
is_encoder_decoder
:
self
.
assertEqual
(
out_len
%
2
,
0
)
decoder_attentions
=
outputs
[(
out_len
//
2
)
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
decoder_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
decoder_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
decoder_seq_length
,
decoder_key_length
])
# Check attention is always last and order is fine
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
model
=
model_class
(
config
)
outputs
=
model
(
inputs_dict
)
outputs
=
model
(
inputs_dict
)
self
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
self
.
assertEqual
(
out_len
+
(
2
if
self
.
is_encoder_decoder
else
1
)
,
len
(
outputs
))
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
...
@@ -230,8 +260,8 @@ class TFCommonTestCases:
...
@@ -230,8 +260,8 @@ class TFCommonTestCases:
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
encoder_
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_key
_length
])
def
test_hidden_states_output
(
self
):
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -264,35 +294,53 @@ class TFCommonTestCases:
...
@@ -264,35 +294,53 @@ class TFCommonTestCases:
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
first
,
second
=
model
(
inputs_dict
,
training
=
False
)[
0
],
model
(
inputs_dict
,
training
=
False
)[
0
]
first
,
second
=
model
(
inputs_dict
,
training
=
False
)[
0
],
model
(
inputs_dict
,
training
=
False
)[
0
]
self
.
assertTrue
(
tf
.
math
.
equal
(
first
,
second
).
numpy
().
all
())
out_1
=
first
.
numpy
()
out_2
=
second
.
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
_get_embeds
(
self
,
wte
,
input_ids
):
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
try
:
x
=
wte
(
input_ids
,
mode
=
"embedding"
)
except
:
try
:
x
=
wte
([
input_ids
],
mode
=
"embedding"
)
except
:
try
:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
)
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
)
return
x
def
test_inputs_embeds
(
self
):
def
test_inputs_embeds
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
inputs_dict
[
"input_ids"
]
if
not
self
.
is_encoder_decoder
:
del
inputs_dict
[
"input_ids"
]
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
else
:
encoder_input_ids
=
inputs_dict
[
"encoder_input_ids"
]
decoder_input_ids
=
inputs_dict
[
"decoder_input_ids"
]
del
inputs_dict
[
"encoder_input_ids"
]
del
inputs_dict
[
"decoder_input_ids"
]
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
wte
=
model
.
get_input_embeddings
()
wte
=
model
.
get_input_embeddings
()
try
:
if
not
self
.
is_encoder_decoder
:
x
=
wte
(
input_ids
,
mode
=
"embedding"
)
inputs_dict
[
"inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
input_ids
)
except
:
else
:
try
:
inputs_dict
[
"encoder_inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
encoder_input_ids
)
x
=
wte
([
input_ids
],
mode
=
"embedding"
)
inputs_dict
[
"decoder_inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
decoder_input_ids
)
except
:
try
:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
)
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
)
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
#
inputs_dict
[
"inputs_embeds"
]
=
x
outputs
=
model
(
inputs_dict
)
outputs
=
model
(
inputs_dict
)
...
...
transformers/tests/modeling_tf_t5_test.py
0 → 100644
View file @
a52d56c8
# coding=utf-8
# Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
import
shutil
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
transformers
import
T5Config
,
is_tf_available
if
is_tf_available
():
import
tensorflow
as
tf
from
transformers.modeling_tf_t5
import
(
TFT5Model
,
TFT5WithLMHeadModel
,
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP
)
@
require_tf
class
TFT5ModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
is_encoder_decoder
=
True
all_model_classes
=
(
TFT5Model
,
TFT5WithLMHeadModel
)
if
is_tf_available
()
else
()
class
TFT5ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
14
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
d_ff
=
37
,
relative_attention_num_buckets
=
8
,
dropout_rate
=
0.1
,
initializer_factor
=
0.002
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
d_ff
=
d_ff
self
.
relative_attention_num_buckets
=
relative_attention_num_buckets
self
.
dropout_rate
=
dropout_rate
self
.
initializer_factor
=
initializer_factor
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_labels
=
None
if
self
.
use_labels
:
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size_or_config_json_file
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
)
return
(
config
,
input_ids
,
input_mask
,
token_labels
)
def
create_and_check_t5_model
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5Model
(
config
=
config
)
inputs
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
encoder_output
,
decoder_output
=
model
(
inputs
)
encoder_output
,
decoder_output
=
model
(
input_ids
,
decoder_attention_mask
=
input_mask
,
encoder_input_ids
=
input_ids
)
result
=
{
"encoder_output"
:
encoder_output
.
numpy
(),
"decoder_output"
:
decoder_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
])
def
create_and_check_t5_with_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5WithLMHeadModel
(
config
=
config
)
inputs
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
prediction_scores
,
decoder_output
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
token_labels
)
=
config_and_inputs
inputs_dict
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFT5ModelTest
.
TFT5ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
T5Config
,
d_model
=
37
)
def
test_config
(
self
):
self
.
config_tester
.
run_common_tests
()
def
test_t5_model
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_model
(
*
config_and_inputs
)
def
test_with_lm_head
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_with_lm_head
(
*
config_and_inputs
)
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
[
't5-small'
]:
model
=
TFT5Model
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
transformers/tests/modeling_tf_transfo_xl_test.py
View file @
a52d56c8
...
@@ -67,7 +67,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -67,7 +67,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
seq_length
=
seq_length
self
.
mem_len
=
mem_len
self
.
mem_len
=
mem_len
self
.
key_len
=
seq_length
+
mem_len
self
.
key_len
gth
=
seq_length
+
mem_len
self
.
clamp_len
=
clamp_len
self
.
clamp_len
=
clamp_len
self
.
is_training
=
is_training
self
.
is_training
=
is_training
self
.
use_labels
=
use_labels
self
.
use_labels
=
use_labels
...
...
transformers/tests/modeling_transfo_xl_test.py
View file @
a52d56c8
...
@@ -66,7 +66,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester):
...
@@ -66,7 +66,7 @@ class TransfoXLModelTest(CommonTestCases.CommonModelTester):
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
seq_length
=
seq_length
self
.
mem_len
=
mem_len
self
.
mem_len
=
mem_len
self
.
key_len
=
seq_length
+
mem_len
self
.
key_len
gth
=
seq_length
+
mem_len
self
.
clamp_len
=
clamp_len
self
.
clamp_len
=
clamp_len
self
.
is_training
=
is_training
self
.
is_training
=
is_training
self
.
use_labels
=
use_labels
self
.
use_labels
=
use_labels
...
...
transformers/tests/tokenization_bert_test.py
View file @
a52d56c8
...
@@ -139,5 +139,6 @@ class BertTokenizationTest(CommonTestCases.CommonTokenizerTester):
...
@@ -139,5 +139,6 @@ class BertTokenizationTest(CommonTestCases.CommonTokenizerTester):
assert
encoded_sentence
==
[
101
]
+
text
+
[
102
]
assert
encoded_sentence
==
[
101
]
+
text
+
[
102
]
assert
encoded_pair
==
[
101
]
+
text
+
[
102
]
+
text_2
+
[
102
]
assert
encoded_pair
==
[
101
]
+
text
+
[
102
]
+
text_2
+
[
102
]
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment