Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
707b12a3
"tests/models/vscode:/vscode.git/clone" did not exist on "693720e56781790d65bdf4a6954a9cee47ce19c1"
Unverified
Commit
707b12a3
authored
Nov 04, 2022
by
H. Jhoo
Committed by
GitHub
Nov 04, 2022
Browse files
change constant torch.tensor to torch.full (#20061)
parent
787620e2
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
6 deletions
+6
-6
src/transformers/models/decision_transformer/modeling_decision_transformer.py
...els/decision_transformer/modeling_decision_transformer.py
+3
-3
src/transformers/models/gpt2/modeling_gpt2.py
src/transformers/models/gpt2/modeling_gpt2.py
+3
-3
No files found.
src/transformers/models/decision_transformer/modeling_decision_transformer.py
View file @
707b12a3
...
...
@@ -170,8 +170,8 @@ class DecisionTransformerGPT2Attention(nn.Module):
attn_weights
=
torch
.
matmul
(
query
,
key
.
transpose
(
-
1
,
-
2
))
if
self
.
scale_attn_weights
:
attn_weights
=
attn_weights
/
torch
.
tensor
(
value
.
size
(
-
1
)
**
0.5
,
dtype
=
attn_weights
.
dtype
,
device
=
attn_weights
.
device
attn_weights
=
attn_weights
/
torch
.
full
(
[],
value
.
size
(
-
1
)
**
0.5
,
dtype
=
attn_weights
.
dtype
,
device
=
attn_weights
.
device
)
# Layer-wise attention scaling
...
...
@@ -185,7 +185,7 @@ class DecisionTransformerGPT2Attention(nn.Module):
mask_value
=
torch
.
finfo
(
attn_weights
.
dtype
).
min
# Need to be a tensor, otherwise we get error: `RuntimeError: expected scalar type float but found double`.
# Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
mask_value
=
torch
.
tensor
(
mask_value
,
dtype
=
attn_weights
.
dtype
).
to
(
attn_weights
.
device
)
mask_value
=
torch
.
full
([],
mask_value
,
dtype
=
attn_weights
.
dtype
).
to
(
attn_weights
.
device
)
attn_weights
=
torch
.
where
(
causal_mask
,
attn_weights
,
mask_value
)
if
attention_mask
is
not
None
:
...
...
src/transformers/models/gpt2/modeling_gpt2.py
View file @
707b12a3
...
...
@@ -182,8 +182,8 @@ class GPT2Attention(nn.Module):
attn_weights
=
torch
.
matmul
(
query
,
key
.
transpose
(
-
1
,
-
2
))
if
self
.
scale_attn_weights
:
attn_weights
=
attn_weights
/
torch
.
tensor
(
value
.
size
(
-
1
)
**
0.5
,
dtype
=
attn_weights
.
dtype
,
device
=
attn_weights
.
device
attn_weights
=
attn_weights
/
torch
.
full
(
[],
value
.
size
(
-
1
)
**
0.5
,
dtype
=
attn_weights
.
dtype
,
device
=
attn_weights
.
device
)
# Layer-wise attention scaling
...
...
@@ -197,7 +197,7 @@ class GPT2Attention(nn.Module):
mask_value
=
torch
.
finfo
(
attn_weights
.
dtype
).
min
# Need to be a tensor, otherwise we get error: `RuntimeError: expected scalar type float but found double`.
# Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
mask_value
=
torch
.
tensor
(
mask_value
,
dtype
=
attn_weights
.
dtype
).
to
(
attn_weights
.
device
)
mask_value
=
torch
.
full
([],
mask_value
,
dtype
=
attn_weights
.
dtype
).
to
(
attn_weights
.
device
)
attn_weights
=
torch
.
where
(
causal_mask
,
attn_weights
,
mask_value
)
if
attention_mask
is
not
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment