Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c536c2a4
Commit
c536c2a4
authored
Nov 26, 2019
by
LysandreJik
Committed by
Lysandre Debut
Nov 26, 2019
Browse files
ALBERT Input Embeds
parent
f873b55e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
91 additions
and
33 deletions
+91
-33
transformers/modeling_albert.py
transformers/modeling_albert.py
+57
-22
transformers/modeling_tf_albert.py
transformers/modeling_tf_albert.py
+34
-11
No files found.
transformers/modeling_albert.py
View file @
c536c2a4
...
...
@@ -433,6 +433,12 @@ class AlbertModel(AlbertPreTrainedModel):
self
.
init_weights
()
def
get_input_embeddings
(
self
):
return
self
.
embeddings
.
word_embeddings
def
set_input_embeddings
(
self
,
value
):
self
.
embeddings
.
word_embeddings
=
value
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
old_embeddings
=
self
.
embeddings
.
word_embeddings
new_embeddings
=
self
.
_get_resized_embeddings
(
old_embeddings
,
new_num_tokens
)
...
...
@@ -457,12 +463,24 @@ class AlbertModel(AlbertPreTrainedModel):
inner_group_idx
=
int
(
layer
-
group_idx
*
self
.
config
.
inner_group_num
)
self
.
encoder
.
albert_layer_groups
[
group_idx
].
albert_layers
[
inner_group_idx
].
attention
.
prune_heads
(
heads
)
def
forward
(
self
,
input_ids
=
None
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
inputs_embeds
=
None
):
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
input_ids
is
not
None
:
input_shape
=
input_ids
.
size
()
elif
inputs_embeds
is
not
None
:
input_shape
=
inputs_embeds
.
size
()[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
device
=
input_ids
.
device
if
input_ids
is
not
None
else
inputs_embeds
.
device
def
forward
(
self
,
input_ids
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
):
if
attention_mask
is
None
:
attention_mask
=
torch
.
ones
_like
(
input_
ids
)
attention_mask
=
torch
.
ones
(
input_
shape
,
device
=
device
)
if
token_type_ids
is
None
:
token_type_ids
=
torch
.
zeros
_like
(
input_
ids
)
token_type_ids
=
torch
.
zeros
(
input_
shape
,
dtype
=
torch
.
long
,
device
=
device
)
extended_attention_mask
=
attention_mask
.
unsqueeze
(
1
).
unsqueeze
(
2
)
extended_attention_mask
=
extended_attention_mask
.
to
(
dtype
=
next
(
self
.
parameters
()).
dtype
)
# fp16 compatibility
...
...
@@ -477,7 +495,8 @@ class AlbertModel(AlbertPreTrainedModel):
else
:
head_mask
=
[
None
]
*
self
.
config
.
num_hidden_layers
embedding_output
=
self
.
embeddings
(
input_ids
,
position_ids
=
position_ids
,
token_type_ids
=
token_type_ids
)
embedding_output
=
self
.
embeddings
(
input_ids
,
position_ids
=
position_ids
,
token_type_ids
=
token_type_ids
,
inputs_embeds
=
inputs_embeds
)
encoder_outputs
=
self
.
encoder
(
embedding_output
,
extended_attention_mask
,
head_mask
=
head_mask
)
...
...
@@ -549,9 +568,19 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
self
.
_tie_or_clone_weights
(
self
.
predictions
.
decoder
,
self
.
albert
.
embeddings
.
word_embeddings
)
def
forward
(
self
,
input_ids
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
masked_lm_labels
=
None
):
outputs
=
self
.
albert
(
input_ids
,
attention_mask
,
token_type_ids
,
position_ids
,
head_mask
)
def
get_output_embeddings
(
self
):
return
self
.
predictions
.
decoder
def
forward
(
self
,
input_ids
=
None
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
masked_lm_labels
=
None
,
inputs_embeds
=
None
):
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
)
sequence_outputs
=
outputs
[
0
]
prediction_scores
=
self
.
predictions
(
sequence_outputs
)
...
...
@@ -609,14 +638,17 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
self
.
init_weights
()
def
forward
(
self
,
input_ids
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
labels
=
None
):
def
forward
(
self
,
input_ids
=
None
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
inputs_embeds
=
None
,
labels
=
None
):
outputs
=
self
.
albert
(
input_ids
,
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
head_mask
=
head_mask
)
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
)
pooled_output
=
outputs
[
1
]
...
...
@@ -692,14 +724,17 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
self
.
init_weights
()
def
forward
(
self
,
input_ids
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
start_positions
=
None
,
end_positions
=
None
):
def
forward
(
self
,
input_ids
=
None
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
inputs_embeds
=
None
,
start_positions
=
None
,
end_positions
=
None
):
outputs
=
self
.
albert
(
input_ids
,
outputs
=
self
.
albert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
head_mask
=
head_mask
)
head_mask
=
head_mask
,
inputs_embeds
=
inputs_embeds
)
sequence_output
=
outputs
[
0
]
...
...
transformers/modeling_tf_albert.py
View file @
c536c2a4
...
...
@@ -107,19 +107,25 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
def
_embedding
(
self
,
inputs
,
training
=
False
):
"""Applies embedding based on inputs tensor."""
input_ids
,
position_ids
,
token_type_ids
=
inputs
input_ids
,
position_ids
,
token_type_ids
,
inputs_embeds
=
inputs
seq_length
=
tf
.
shape
(
input_ids
)[
1
]
if
input_ids
is
not
None
:
input_shape
=
tf
.
shape
(
input_ids
)
else
:
input_shape
=
tf
.
shape
(
inputs_embeds
)[:
-
1
]
seq_length
=
input_shape
[
1
]
if
position_ids
is
None
:
position_ids
=
tf
.
range
(
seq_length
,
dtype
=
tf
.
int32
)[
tf
.
newaxis
,
:]
if
token_type_ids
is
None
:
token_type_ids
=
tf
.
fill
(
tf
.
shape
(
input_
ids
)
,
0
)
token_type_ids
=
tf
.
fill
(
input_
shape
,
0
)
words_embeddings
=
tf
.
gather
(
self
.
word_embeddings
,
input_ids
)
if
inputs_embeds
is
None
:
inputs_embeds
=
tf
.
gather
(
self
.
word_embeddings
,
input_ids
)
position_embeddings
=
self
.
position_embeddings
(
position_ids
)
token_type_embeddings
=
self
.
token_type_embeddings
(
token_type_ids
)
embeddings
=
word
s_embed
ding
s
+
position_embeddings
+
token_type_embeddings
embeddings
=
input
s_embeds
+
position_embeddings
+
token_type_embeddings
embeddings
=
self
.
LayerNorm
(
embeddings
)
embeddings
=
self
.
dropout
(
embeddings
,
training
=
training
)
return
embeddings
...
...
@@ -603,6 +609,9 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
self
.
pooler
=
tf
.
keras
.
layers
.
Dense
(
config
.
hidden_size
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
activation
=
'tanh'
,
name
=
'pooler'
)
def
get_input_embeddings
(
self
):
return
self
.
embeddings
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
raise
NotImplementedError
...
...
@@ -613,28 +622,39 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
"""
raise
NotImplementedError
def
call
(
self
,
inputs
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
training
=
False
):
def
call
(
self
,
inputs
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
inputs_embeds
=
None
,
training
=
False
):
if
isinstance
(
inputs
,
(
tuple
,
list
)):
input_ids
=
inputs
[
0
]
attention_mask
=
inputs
[
1
]
if
len
(
inputs
)
>
1
else
attention_mask
token_type_ids
=
inputs
[
2
]
if
len
(
inputs
)
>
2
else
token_type_ids
position_ids
=
inputs
[
3
]
if
len
(
inputs
)
>
3
else
position_ids
head_mask
=
inputs
[
4
]
if
len
(
inputs
)
>
4
else
head_mask
assert
len
(
inputs
)
<=
5
,
"Too many inputs."
inputs_embeds
=
inputs
[
5
]
if
len
(
inputs
)
>
5
else
inputs_embeds
assert
len
(
inputs
)
<=
6
,
"Too many inputs."
elif
isinstance
(
inputs
,
dict
):
input_ids
=
inputs
.
get
(
'input_ids'
)
attention_mask
=
inputs
.
get
(
'attention_mask'
,
attention_mask
)
token_type_ids
=
inputs
.
get
(
'token_type_ids'
,
token_type_ids
)
position_ids
=
inputs
.
get
(
'position_ids'
,
position_ids
)
head_mask
=
inputs
.
get
(
'head_mask'
,
head_mask
)
assert
len
(
inputs
)
<=
5
,
"Too many inputs."
inputs_embeds
=
inputs
.
get
(
'inputs_embeds'
,
inputs_embeds
)
assert
len
(
inputs
)
<=
6
,
"Too many inputs."
else
:
input_ids
=
inputs
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
input_ids
is
not
None
:
input_shape
=
input_ids
.
shape
elif
inputs_embeds
is
not
None
:
input_shape
=
inputs_embeds
.
shape
[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
if
attention_mask
is
None
:
attention_mask
=
tf
.
fill
(
tf
.
shape
(
input_
ids
)
,
1
)
attention_mask
=
tf
.
fill
(
input_
shape
,
1
)
if
token_type_ids
is
None
:
token_type_ids
=
tf
.
fill
(
tf
.
shape
(
input_
ids
)
,
0
)
token_type_ids
=
tf
.
fill
(
input_
shape
,
0
)
# We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length]
...
...
@@ -664,7 +684,7 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
# head_mask = tf.constant([0] * self.num_hidden_layers)
embedding_output
=
self
.
embeddings
(
[
input_ids
,
position_ids
,
token_type_ids
],
training
=
training
)
[
input_ids
,
position_ids
,
token_type_ids
,
inputs_embeds
],
training
=
training
)
encoder_outputs
=
self
.
encoder
(
[
embedding_output
,
extended_attention_mask
,
head_mask
],
training
=
training
)
...
...
@@ -712,6 +732,9 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel):
self
.
predictions
=
TFAlbertMLMHead
(
config
,
self
.
albert
.
embeddings
,
name
=
'predictions'
)
def
get_output_embeddings
(
self
):
return
self
.
albert
.
embeddings
def
call
(
self
,
inputs
,
**
kwargs
):
outputs
=
self
.
albert
(
inputs
,
**
kwargs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment