Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
31b0560a
"tests/optimization/test_optimization.py" did not exist on "e6b811f0a7a3174d0e62c2cdf876230510031319"
Unverified
Commit
31b0560a
authored
Feb 15, 2021
by
Julien Plu
Committed by
GitHub
Feb 15, 2021
Browse files
Add AMP for Albert (#10141)
parent
6fc940ed
Changes
8
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
415 additions
and
345 deletions
+415
-345
src/transformers/models/albert/modeling_tf_albert.py
src/transformers/models/albert/modeling_tf_albert.py
+375
-309
src/transformers/models/bert/modeling_tf_bert.py
src/transformers/models/bert/modeling_tf_bert.py
+7
-7
src/transformers/models/convbert/modeling_tf_convbert.py
src/transformers/models/convbert/modeling_tf_convbert.py
+5
-5
src/transformers/models/electra/modeling_tf_electra.py
src/transformers/models/electra/modeling_tf_electra.py
+5
-6
src/transformers/models/longformer/modeling_tf_longformer.py
src/transformers/models/longformer/modeling_tf_longformer.py
+3
-3
src/transformers/models/roberta/modeling_tf_roberta.py
src/transformers/models/roberta/modeling_tf_roberta.py
+4
-5
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py
...ame}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py
+5
-6
tests/test_modeling_tf_albert.py
tests/test_modeling_tf_albert.py
+11
-4
No files found.
src/transformers/models/albert/modeling_tf_albert.py
View file @
31b0560a
This diff is collapsed.
Click to expand it.
src/transformers/models/bert/modeling_tf_bert.py
View file @
31b0560a
...
...
@@ -148,21 +148,21 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
self
.
weight
=
self
.
add_weight
(
name
=
"weight"
,
shape
=
[
self
.
vocab_size
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"token_type_embeddings"
):
self
.
token_type_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
type_vocab_size
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"position_embeddings"
):
self
.
position_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
max_position_embeddings
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
super
().
build
(
input_shape
)
...
...
@@ -253,8 +253,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
key_layer
=
self
.
transpose_for_scores
(
mixed_key_layer
,
batch_size
)
value_layer
=
self
.
transpose_for_scores
(
mixed_value_layer
,
batch_size
)
# Take the dot product between "query" and "key" to get the raw
# attention scores.
# Take the dot product between "query" and "key" to get the raw attention scores.
# (batch size, num_heads, seq_len_q, seq_len_k)
attention_scores
=
tf
.
matmul
(
query_layer
,
key_layer
,
transpose_b
=
True
)
dk
=
tf
.
cast
(
self
.
sqrt_att_head_size
,
dtype
=
attention_scores
.
dtype
)
...
...
@@ -1009,7 +1008,8 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
total_loss
=
self
.
compute_loss
(
labels
=
d_labels
,
logits
=
(
prediction_scores
,
seq_relationship_score
))
if
not
inputs
[
"return_dict"
]:
return
(
prediction_scores
,
seq_relationship_score
)
+
outputs
[
2
:]
output
=
(
prediction_scores
,
seq_relationship_score
)
+
outputs
[
2
:]
return
((
total_loss
,)
+
output
)
if
total_loss
is
not
None
else
output
return
TFBertForPreTrainingOutput
(
loss
=
total_loss
,
...
...
@@ -1598,7 +1598,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss):
}
]
)
def
serving
(
self
,
inputs
:
Dict
[
str
,
tf
.
Tensor
]):
def
serving
(
self
,
inputs
:
Dict
[
str
,
tf
.
Tensor
])
->
TFMultipleChoiceModelOutput
:
output
=
self
.
call
(
input_ids
=
inputs
)
return
self
.
serving_output
(
output
)
...
...
src/transformers/models/convbert/modeling_tf_convbert.py
View file @
31b0560a
...
...
@@ -62,11 +62,11 @@ TF_CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
]
# Copied from transformers.models.albert.modeling_tf_albert.TFAlbertEmbeddings
# Copied from transformers.models.albert.modeling_tf_albert.TFAlbertEmbeddings
with Albert->ConvBert
class
TFConvBertEmbeddings
(
tf
.
keras
.
layers
.
Layer
):
"""Construct the embeddings from word, position and token_type embeddings."""
def
__init__
(
self
,
config
,
**
kwargs
):
def
__init__
(
self
,
config
:
ConvBertConfig
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
config
.
vocab_size
...
...
@@ -83,21 +83,21 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer):
self
.
weight
=
self
.
add_weight
(
name
=
"weight"
,
shape
=
[
self
.
vocab_size
,
self
.
embedding_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"token_type_embeddings"
):
self
.
token_type_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
type_vocab_size
,
self
.
embedding_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"position_embeddings"
):
self
.
position_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
max_position_embeddings
,
self
.
embedding_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
super
().
build
(
input_shape
)
...
...
src/transformers/models/electra/modeling_tf_electra.py
View file @
31b0560a
...
...
@@ -121,8 +121,7 @@ class TFElectraSelfAttention(tf.keras.layers.Layer):
key_layer
=
self
.
transpose_for_scores
(
mixed_key_layer
,
batch_size
)
value_layer
=
self
.
transpose_for_scores
(
mixed_value_layer
,
batch_size
)
# Take the dot product between "query" and "key" to get the raw
# attention scores.
# Take the dot product between "query" and "key" to get the raw attention scores.
# (batch size, num_heads, seq_len_q, seq_len_k)
attention_scores
=
tf
.
matmul
(
query_layer
,
key_layer
,
transpose_b
=
True
)
dk
=
tf
.
cast
(
self
.
sqrt_att_head_size
,
dtype
=
attention_scores
.
dtype
)
...
...
@@ -353,7 +352,7 @@ class TFElectraPooler(tf.keras.layers.Layer):
class
TFElectraEmbeddings
(
tf
.
keras
.
layers
.
Layer
):
"""Construct the embeddings from word, position and token_type embeddings."""
def
__init__
(
self
,
config
,
**
kwargs
):
def
__init__
(
self
,
config
:
ElectraConfig
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
config
.
vocab_size
...
...
@@ -370,21 +369,21 @@ class TFElectraEmbeddings(tf.keras.layers.Layer):
self
.
weight
=
self
.
add_weight
(
name
=
"weight"
,
shape
=
[
self
.
vocab_size
,
self
.
embedding_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"token_type_embeddings"
):
self
.
token_type_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
type_vocab_size
,
self
.
embedding_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"position_embeddings"
):
self
.
position_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
max_position_embeddings
,
self
.
embedding_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
super
().
build
(
input_shape
)
...
...
src/transformers/models/longformer/modeling_tf_longformer.py
View file @
31b0560a
...
...
@@ -491,21 +491,21 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer):
self
.
weight
=
self
.
add_weight
(
name
=
"weight"
,
shape
=
[
self
.
vocab_size
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"token_type_embeddings"
):
self
.
token_type_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
type_vocab_size
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"position_embeddings"
):
self
.
position_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
max_position_embeddings
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
super
().
build
(
input_shape
)
...
...
src/transformers/models/roberta/modeling_tf_roberta.py
View file @
31b0560a
...
...
@@ -92,21 +92,21 @@ class TFRobertaEmbeddings(tf.keras.layers.Layer):
self
.
weight
=
self
.
add_weight
(
name
=
"weight"
,
shape
=
[
self
.
vocab_size
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"token_type_embeddings"
):
self
.
token_type_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
type_vocab_size
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"position_embeddings"
):
self
.
position_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
max_position_embeddings
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
super
().
build
(
input_shape
)
...
...
@@ -232,8 +232,7 @@ class TFRobertaSelfAttention(tf.keras.layers.Layer):
key_layer
=
self
.
transpose_for_scores
(
mixed_key_layer
,
batch_size
)
value_layer
=
self
.
transpose_for_scores
(
mixed_value_layer
,
batch_size
)
# Take the dot product between "query" and "key" to get the raw
# attention scores.
# Take the dot product between "query" and "key" to get the raw attention scores.
# (batch size, num_heads, seq_len_q, seq_len_k)
attention_scores
=
tf
.
matmul
(
query_layer
,
key_layer
,
transpose_b
=
True
)
dk
=
tf
.
cast
(
self
.
sqrt_att_head_size
,
dtype
=
attention_scores
.
dtype
)
...
...
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py
View file @
31b0560a
...
...
@@ -90,21 +90,21 @@ class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer):
self
.
weight
=
self
.
add_weight
(
name
=
"weight"
,
shape
=
[
self
.
vocab_size
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"token_type_embeddings"
):
self
.
token_type_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
type_vocab_size
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
with
tf
.
name_scope
(
"position_embeddings"
):
self
.
position_embeddings
=
self
.
add_weight
(
name
=
"embeddings"
,
shape
=
[
self
.
max_position_embeddings
,
self
.
hidden_size
],
initializer
=
get_initializer
(
initializer_range
=
self
.
initializer_range
),
initializer
=
get_initializer
(
self
.
initializer_range
),
)
super
().
build
(
input_shape
)
...
...
@@ -197,8 +197,7 @@ class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer)
key_layer
=
self
.
transpose_for_scores
(
mixed_key_layer
,
batch_size
)
value_layer
=
self
.
transpose_for_scores
(
mixed_value_layer
,
batch_size
)
# Take the dot product between "query" and "key" to get the raw
# attention scores.
# Take the dot product between "query" and "key" to get the raw attention scores.
# (batch size, num_heads, seq_len_q, seq_len_k)
attention_scores
=
tf
.
matmul
(
query_layer
,
key_layer
,
transpose_b
=
True
)
dk
=
tf
.
cast
(
self
.
sqrt_att_head_size
,
dtype
=
attention_scores
.
dtype
)
...
...
@@ -1247,7 +1246,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(TF{{cookiecutter.c
"token_type_ids"
:
tf
.
TensorSpec
((
None
,
None
,
None
),
tf
.
int32
,
name
=
"token_type_ids"
),
}])
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving
def
serving
(
self
,
inputs
:
Dict
[
str
,
tf
.
Tensor
]):
def
serving
(
self
,
inputs
:
Dict
[
str
,
tf
.
Tensor
])
->
TFMultipleChoiceModelOutput
:
output
=
self
.
call
(
input_ids
=
inputs
)
return
self
.
serving_output
(
output
)
...
...
tests/test_modeling_tf_albert.py
View file @
31b0560a
...
...
@@ -26,6 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if
is_tf_available
():
import
tensorflow
as
tf
from
transformers
import
TF_MODEL_FOR_PRETRAINING_MAPPING
from
transformers.models.albert.modeling_tf_albert
import
(
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST
,
TFAlbertForMaskedLM
,
...
...
@@ -243,6 +244,16 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
test_head_masking
=
False
test_onnx
=
False
# special case for ForPreTraining model
def
_prepare_for_class
(
self
,
inputs_dict
,
model_class
,
return_labels
=
False
):
inputs_dict
=
super
().
_prepare_for_class
(
inputs_dict
,
model_class
,
return_labels
=
return_labels
)
if
return_labels
:
if
model_class
in
TF_MODEL_FOR_PRETRAINING_MAPPING
.
values
():
inputs_dict
[
"sentence_order_label"
]
=
tf
.
zeros
(
self
.
model_tester
.
batch_size
,
dtype
=
tf
.
int32
)
return
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFAlbertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
AlbertConfig
,
hidden_size
=
37
)
...
...
@@ -295,10 +306,6 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
name
=
model
.
get_bias
()
assert
name
is
None
def
test_mixed_precision
(
self
):
# TODO JP: Make ALBERT float16 compliant
pass
@
slow
def
test_model_from_pretrained
(
self
):
for
model_name
in
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST
[:
1
]:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment