Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
7d4ad51e
Commit
7d4ad51e
authored
Sep 29, 2020
by
Vijay Korthikanti
Browse files
bug fix
parent
37ae6646
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
9 deletions
+9
-9
megatron/model/transformer.py
megatron/model/transformer.py
+9
-9
No files found.
megatron/model/transformer.py
View file @
7d4ad51e
...
@@ -197,23 +197,23 @@ class ParallelSelfAttention(MegatronModule):
...
@@ -197,23 +197,23 @@ class ParallelSelfAttention(MegatronModule):
# Query, Key, and Value
# Query, Key, and Value
# =====================
# =====================
# Attention heads [s, b, hp] --> [s, b,
3
*
hp
]
# Attention heads [s, b, hp] --> [s, b,
hp
*
3
]
mixed_x_layer
,
_
=
self
.
query_key_value
(
hidden_states
)
mixed_x_layer
,
_
=
self
.
query_key_value
(
hidden_states
)
if
self
.
old_checkpoint_format
:
if
self
.
old_checkpoint_format
:
self
.
_transpose_last_dim
(
mixed_x_layer
)
# [s, b, 3 * hp] --> [s, b, hp * 3]
mixed_x_layer
=
self
.
_transpose_last_dim
(
mixed_x_layer
)
# [s, b,
3
*
hp
] --> [s, b, np,
3 * hn
]
# [s, b,
hp
*
3
] --> [s, b, np,
hn, 3
]
new_tensor_shape
=
mixed_x_layer
.
size
()[:
-
1
]
+
\
new_tensor_shape
=
mixed_x_layer
.
size
()[:
-
1
]
+
\
(
self
.
num_attention_heads_per_partition
,
(
self
.
num_attention_heads_per_partition
,
3
*
self
.
hidden_size_per_attention_head
)
self
.
hidden_size_per_attention_head
,
3
)
mixed_x_layer
=
mixed_x_layer
.
view
(
*
new_tensor_shape
)
mixed_x_layer
=
mixed_x_layer
.
view
(
*
new_tensor_shape
)
# [s, b, np, 3 * hn] --> 3 [s, b, np, hn]
# [s, b, np, hn, 3] --> 3 [s, b, np, hn]
(
query_layer
,
query_layer
=
mixed_x_layer
[:,:,:,:,
0
]
key_layer
,
key_layer
=
mixed_x_layer
[:,:,:,:,
1
]
value_layer
)
=
mpu
.
split_tensor_along_last_dim
(
mixed_x_layer
,
3
)
value_layer
=
mixed_x_layer
[:,:,:,:,
2
]
# ==================================
# ==================================
# Adjust key and value for inference
# Adjust key and value for inference
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment