Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
bd262158
"vscode:/vscode.git/clone" did not exist on "851bb87ca97cb261324ef418ca00a66cff2c9c91"
Unverified
Commit
bd262158
authored
Oct 01, 2020
by
Patrick von Platen
Committed by
GitHub
Oct 01, 2020
Browse files
fix data type (#7513)
parent
62f5ae68
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
3 deletions
+10
-3
src/transformers/modeling_utils.py
src/transformers/modeling_utils.py
+10
-3
No files found.
src/transformers/modeling_utils.py
View file @
bd262158
...
@@ -238,13 +238,20 @@ class ModuleUtilsMixin:
...
@@ -238,13 +238,20 @@ class ModuleUtilsMixin:
seq_ids
=
torch
.
arange
(
seq_length
,
device
=
device
)
seq_ids
=
torch
.
arange
(
seq_length
,
device
=
device
)
causal_mask
=
seq_ids
[
None
,
None
,
:].
repeat
(
batch_size
,
seq_length
,
1
)
<=
seq_ids
[
None
,
:,
None
]
causal_mask
=
seq_ids
[
None
,
None
,
:].
repeat
(
batch_size
,
seq_length
,
1
)
<=
seq_ids
[
None
,
:,
None
]
# in case past_key_values are used we need to add a prefix ones mask to the causal mask
# in case past_key_values are used we need to add a prefix ones mask to the causal mask
# causal and attention masks must have same type with pytorch version < 1.3
causal_mask
=
causal_mask
.
to
(
attention_mask
.
dtype
)
if
causal_mask
.
shape
[
1
]
<
attention_mask
.
shape
[
1
]:
if
causal_mask
.
shape
[
1
]
<
attention_mask
.
shape
[
1
]:
prefix_seq_len
=
attention_mask
.
shape
[
1
]
-
causal_mask
.
shape
[
1
]
prefix_seq_len
=
attention_mask
.
shape
[
1
]
-
causal_mask
.
shape
[
1
]
causal_mask
=
torch
.
cat
(
causal_mask
=
torch
.
cat
(
[
torch
.
ones
((
batch_size
,
seq_length
,
prefix_seq_len
),
device
=
device
),
causal_mask
],
axis
=-
1
[
torch
.
ones
(
(
batch_size
,
seq_length
,
prefix_seq_len
),
device
=
device
,
dtype
=
causal_mask
.
dtype
),
causal_mask
,
],
axis
=-
1
,
)
)
# causal and attention masks must have same type with pytorch version < 1.3
causal_mask
=
causal_mask
.
to
(
attention_mask
.
dtype
)
extended_attention_mask
=
causal_mask
[:,
None
,
:,
:]
*
attention_mask
[:,
None
,
None
,
:]
extended_attention_mask
=
causal_mask
[:,
None
,
:,
:]
*
attention_mask
[:,
None
,
None
,
:]
else
:
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment