Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0537139b
Commit
0537139b
authored
Sep 09, 2019
by
thomwolf
Browse files
removing tf.function
parent
33cb00f4
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
0 additions
and
36 deletions
+0
-36
pytorch_transformers/modeling_tf_bert.py
pytorch_transformers/modeling_tf_bert.py
+0
-22
pytorch_transformers/modeling_tf_gpt2.py
pytorch_transformers/modeling_tf_gpt2.py
+0
-12
pytorch_transformers/modeling_tf_utils.py
pytorch_transformers/modeling_tf_utils.py
+0
-2
No files found.
pytorch_transformers/modeling_tf_bert.py
View file @
0537139b
...
...
@@ -164,7 +164,6 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
mean
=
0.
,
stddev
=
self
.
hidden_size
**-
0.5
))
super
(
TFBertEmbeddings
,
self
).
build
(
input_shape
)
# @tf.function
def
call
(
self
,
inputs
,
mode
=
"embedding"
,
training
=
False
):
"""Get token embeddings of inputs.
Args:
...
...
@@ -248,7 +247,6 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
x
=
tf
.
reshape
(
x
,
(
batch_size
,
-
1
,
self
.
num_attention_heads
,
self
.
attention_head_size
))
return
tf
.
transpose
(
x
,
perm
=
[
0
,
2
,
1
,
3
])
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -297,7 +295,6 @@ class TFBertSelfOutput(tf.keras.layers.Layer):
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
input_tensor
=
inputs
...
...
@@ -317,7 +314,6 @@ class TFBertAttention(tf.keras.layers.Layer):
def
prune_heads
(
self
,
heads
):
raise
NotImplementedError
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
input_tensor
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -336,7 +332,6 @@ class TFBertIntermediate(tf.keras.layers.Layer):
else
:
self
.
intermediate_act_fn
=
config
.
hidden_act
# @tf.function
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
intermediate_act_fn
(
hidden_states
)
...
...
@@ -350,7 +345,6 @@ class TFBertOutput(tf.keras.layers.Layer):
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
input_tensor
=
inputs
...
...
@@ -368,7 +362,6 @@ class TFBertLayer(tf.keras.layers.Layer):
self
.
intermediate
=
TFBertIntermediate
(
config
,
name
=
'intermediate'
)
self
.
bert_output
=
TFBertOutput
(
config
,
name
=
'output'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -387,7 +380,6 @@ class TFBertEncoder(tf.keras.layers.Layer):
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
layer
=
[
TFBertLayer
(
config
,
name
=
'layer_{}'
.
format
(
i
))
for
i
in
range
(
config
.
num_hidden_layers
)]
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -420,7 +412,6 @@ class TFBertPooler(tf.keras.layers.Layer):
super
(
TFBertPooler
,
self
).
__init__
(
**
kwargs
)
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
config
.
hidden_size
,
activation
=
'tanh'
,
name
=
'dense'
)
# @tf.function
def
call
(
self
,
hidden_states
):
# We "pool" the model by simply taking the hidden state corresponding
# to the first token.
...
...
@@ -439,7 +430,6 @@ class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
self
.
transform_act_fn
=
config
.
hidden_act
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
# @tf.function
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
transform_act_fn
(
hidden_states
)
...
...
@@ -463,7 +453,6 @@ class TFBertLMPredictionHead(tf.keras.layers.Layer):
trainable
=
True
,
name
=
'bias'
)
# @tf.function
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
...
...
@@ -475,7 +464,6 @@ class TFBertMLMHead(tf.keras.layers.Layer):
super
(
TFBertMLMHead
,
self
).
__init__
(
**
kwargs
)
self
.
predictions
=
TFBertLMPredictionHead
(
config
,
name
=
'predictions'
)
# @tf.function
def
call
(
self
,
sequence_output
):
prediction_scores
=
self
.
predictions
(
sequence_output
)
return
prediction_scores
...
...
@@ -486,7 +474,6 @@ class TFBertNSPHead(tf.keras.layers.Layer):
super
(
TFBertNSPHead
,
self
).
__init__
(
**
kwargs
)
self
.
seq_relationship
=
tf
.
keras
.
layers
.
Dense
(
2
,
name
=
'seq_relationship'
)
# @tf.function
def
call
(
self
,
pooled_output
):
seq_relationship_score
=
self
.
seq_relationship
(
pooled_output
)
return
seq_relationship_score
...
...
@@ -511,7 +498,6 @@ class TFBertMainLayer(tf.keras.layers.Layer):
"""
raise
NotImplementedError
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
...
...
@@ -693,7 +679,6 @@ class TFBertModel(TFBertPreTrainedModel):
super
(
TFBertModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
return
outputs
...
...
@@ -732,7 +717,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
@@ -774,7 +758,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
@@ -818,7 +801,6 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
@@ -863,7 +845,6 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel):
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'classifier'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
@@ -912,7 +893,6 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
1
,
name
=
'classifier'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
...
...
@@ -989,7 +969,6 @@ class TFBertForTokenClassification(TFBertPreTrainedModel):
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'classifier'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
@@ -1040,7 +1019,6 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
qa_outputs
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'qa_outputs'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
pytorch_transformers/modeling_tf_gpt2.py
View file @
0537139b
...
...
@@ -143,7 +143,6 @@ class TFAttention(tf.keras.layers.Layer):
pass
@
staticmethod
# @tf.function
def
causal_attention_mask
(
nd
,
ns
,
dtype
):
"""1's in the lower triangle, counting from the lower right corner.
Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs.
...
...
@@ -153,7 +152,6 @@ class TFAttention(tf.keras.layers.Layer):
m
=
i
>=
j
-
ns
+
nd
return
tf
.
cast
(
m
,
dtype
)
# @tf.function
def
_attn
(
self
,
inputs
,
training
=
False
):
q
,
k
,
v
,
attention_mask
,
head_mask
=
inputs
# q, k, v have shape [batch, heads, sequence, features]
...
...
@@ -185,21 +183,18 @@ class TFAttention(tf.keras.layers.Layer):
outputs
.
append
(
w
)
return
outputs
# @tf.function
def
merge_heads
(
self
,
x
):
x
=
tf
.
transpose
(
x
,
[
0
,
2
,
1
,
3
])
x_shape
=
shape_list
(
x
)
new_x_shape
=
x_shape
[:
-
2
]
+
[
x_shape
[
-
2
]
*
x_shape
[
-
1
]]
return
tf
.
reshape
(
x
,
new_x_shape
)
# @tf.function
def
split_heads
(
self
,
x
):
x_shape
=
shape_list
(
x
)
new_x_shape
=
x_shape
[:
-
1
]
+
[
self
.
n_head
,
x_shape
[
-
1
]
//
self
.
n_head
]
x
=
tf
.
reshape
(
x
,
new_x_shape
)
return
tf
.
transpose
(
x
,
(
0
,
2
,
1
,
3
))
# (batch, head, seq_length, head_features)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
x
,
layer_past
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -235,7 +230,6 @@ class TFMLP(tf.keras.layers.Layer):
self
.
act
=
gelu
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
resid_pdrop
)
# @tf.function
def
call
(
self
,
x
,
training
=
False
):
h
=
self
.
act
(
self
.
c_fc
(
x
))
h2
=
self
.
c_proj
(
h
)
...
...
@@ -253,7 +247,6 @@ class TFBlock(tf.keras.layers.Layer):
self
.
ln_2
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_epsilon
,
name
=
'ln_2'
)
self
.
mlp
=
TFMLP
(
4
*
nx
,
config
,
name
=
'mlp'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
x
,
layer_past
,
attention_mask
,
head_mask
=
inputs
...
...
@@ -289,7 +282,6 @@ class TFGPT2Embeddings(tf.keras.layers.Layer):
mean
=
0.
,
stddev
=
self
.
hidden_size
**-
0.5
))
super
(
TFGPT2Embeddings
,
self
).
build
(
input_shape
)
# @tf.function
def
call
(
self
,
inputs
,
mode
=
"embedding"
):
"""Get token embeddings of inputs.
Args:
...
...
@@ -354,7 +346,6 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
"""
raise
NotImplementedError
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
...
...
@@ -568,7 +559,6 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
super
(
TFGPT2Model
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
transformer
(
inputs
,
training
=
training
)
return
outputs
...
...
@@ -610,7 +600,6 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
super
(
TFGPT2LMHeadModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
transformer_outputs
=
self
.
transformer
(
inputs
,
training
=
training
)
hidden_states
=
transformer_outputs
[
0
]
...
...
@@ -680,7 +669,6 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
self
.
multiple_choice_head
=
TFSequenceSummary
(
config
,
name
=
'multiple_choice_head'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
...
...
pytorch_transformers/modeling_tf_utils.py
View file @
0537139b
...
...
@@ -277,7 +277,6 @@ class TFConv1D(tf.keras.layers.Layer):
shape
=
[
1
,
self
.
nf
],
initializer
=
tf
.
zeros_initializer
())
@
tf
.
function
def
call
(
self
,
x
):
bz
,
sl
=
shape_list
(
x
)[:
2
]
...
...
@@ -334,7 +333,6 @@ class TFSequenceSummary(tf.keras.layers.Layer):
if
hasattr
(
config
,
'summary_last_dropout'
)
and
config
.
summary_last_dropout
>
0
:
self
.
last_dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
summary_last_dropout
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
cls_index: [optional] position of the classification token if summary_type == 'cls_index',
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment