Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
90ca8d36
Unverified
Commit
90ca8d36
authored
Jan 15, 2021
by
Patrick von Platen
Committed by
GitHub
Jan 15, 2021
Browse files
[TF Led] Fix wrong decoder attention mask behavior (#9601)
* fix tf led * remove loop file
parent
85788bae
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
15 deletions
+3
-15
src/transformers/models/led/modeling_tf_led.py
src/transformers/models/led/modeling_tf_led.py
+3
-15
No files found.
src/transformers/models/led/modeling_tf_led.py
View file @
90ca8d36
...
@@ -1862,7 +1862,6 @@ class TFLEDDecoder(tf.keras.layers.Layer):
...
@@ -1862,7 +1862,6 @@ class TFLEDDecoder(tf.keras.layers.Layer):
hidden_states
=
inputs
[
"inputs_embeds"
]
hidden_states
=
inputs
[
"inputs_embeds"
]
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
combined_attention_mask
=
None
if
input_shape
[
-
1
]
>
1
:
if
input_shape
[
-
1
]
>
1
:
combined_attention_mask
=
_make_causal_mask
(
input_shape
,
past_key_values_length
=
past_key_values_length
)
combined_attention_mask
=
_make_causal_mask
(
input_shape
,
past_key_values_length
=
past_key_values_length
)
else
:
else
:
...
@@ -1870,20 +1869,9 @@ class TFLEDDecoder(tf.keras.layers.Layer):
...
@@ -1870,20 +1869,9 @@ class TFLEDDecoder(tf.keras.layers.Layer):
tf
.
ones
((
input_shape
[
0
],
input_shape
[
1
]
+
past_key_values_length
)),
tgt_len
=
input_shape
[
-
1
]
tf
.
ones
((
input_shape
[
0
],
input_shape
[
1
]
+
past_key_values_length
)),
tgt_len
=
input_shape
[
-
1
]
)
)
if
inputs
[
"attention_mask"
]
is
None
and
inputs
[
"input_ids"
]
is
not
None
and
input_shape
[
-
1
]
>
1
:
if
inputs
[
"attention_mask"
]
is
not
None
and
input_shape
[
-
1
]
>
1
:
inputs
[
"attention_mask"
]
=
tf
.
cast
(
combined_attention_mask
=
combined_attention_mask
+
_expand_mask
(
tf
.
math
.
not_equal
(
inputs
[
"input_ids"
],
self
.
config
.
pad_token_id
),
inputs
[
"input_ids"
].
dtype
inputs
[
"attention_mask"
],
tgt_len
=
input_shape
[
-
1
]
)
inputs
[
"attention_mask"
]
=
tf
.
concat
(
[
tf
.
ones
((
input_shape
[
0
],
past_key_values_length
),
dtype
=
inputs
[
"attention_mask"
].
dtype
),
inputs
[
"attention_mask"
],
],
axis
=-
1
,
)
else
:
inputs
[
"attention_mask"
]
=
tf
.
ones
(
(
input_shape
[
0
],
input_shape
[
1
]
+
past_key_values_length
),
dtype
=
tf
.
int32
)
)
if
inputs
[
"encoder_hidden_states"
]
is
not
None
and
inputs
[
"encoder_attention_mask"
]
is
not
None
:
if
inputs
[
"encoder_hidden_states"
]
is
not
None
and
inputs
[
"encoder_attention_mask"
]
is
not
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment