Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
df521589
Commit
df521589
authored
Apr 03, 2023
by
Mostofa Patwary
Browse files
addressed the comments
parent
1e0e555c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
4 deletions
+3
-4
megatron/model/transformer.py
megatron/model/transformer.py
+3
-4
No files found.
megatron/model/transformer.py
View file @
df521589
...
@@ -486,7 +486,7 @@ class ParallelAttention(MegatronModule):
...
@@ -486,7 +486,7 @@ class ParallelAttention(MegatronModule):
# =================================================
# =================================================
# Pre-allocate memory for key-values for inference.
# Pre-allocate memory for key-values for inference.
# =================================================
# =================================================
is_first_step
=
False
if
inference_params
:
if
inference_params
:
if
self
.
layer_number
not
in
inference_params
.
key_value_memory_dict
:
if
self
.
layer_number
not
in
inference_params
.
key_value_memory_dict
:
inf_max_seq_len
=
inference_params
.
max_sequence_len
inf_max_seq_len
=
inference_params
.
max_sequence_len
...
@@ -497,6 +497,7 @@ class ParallelAttention(MegatronModule):
...
@@ -497,6 +497,7 @@ class ParallelAttention(MegatronModule):
inf_max_seq_len
,
inf_max_batch_size
)
inf_max_seq_len
,
inf_max_batch_size
)
inference_params
.
key_value_memory_dict
[
self
.
layer_number
]
=
(
inference_params
.
key_value_memory_dict
[
self
.
layer_number
]
=
(
inference_key_memory
,
inference_value_memory
)
inference_key_memory
,
inference_value_memory
)
is_first_step
=
True
else
:
else
:
inference_key_memory
,
inference_value_memory
=
\
inference_key_memory
,
inference_value_memory
=
\
inference_params
.
key_value_memory_dict
[
self
.
layer_number
]
inference_params
.
key_value_memory_dict
[
self
.
layer_number
]
...
@@ -741,14 +742,12 @@ class ParallelTransformerLayer(MegatronModule):
...
@@ -741,14 +742,12 @@ class ParallelTransformerLayer(MegatronModule):
layernorm_output
=
self
.
input_layernorm
(
hidden_states
)
layernorm_output
=
self
.
input_layernorm
(
hidden_states
)
# Self attention.
# Self attention.
self_attention_pos_emb
=
None
self_attention_pos_emb
=
None
if
rotary_pos_emb
is
not
None
:
self_attention_pos_emb
=
rotary_pos_emb
attention_output
,
attention_bias
=
\
attention_output
,
attention_bias
=
\
self
.
self_attention
(
self
.
self_attention
(
layernorm_output
,
layernorm_output
,
attention_mask
,
attention_mask
,
inference_params
=
inference_params
,
inference_params
=
inference_params
,
rotary_pos_emb
=
self_attention
_pos_emb
)
rotary_pos_emb
=
rotary
_pos_emb
)
# Residual connection.
# Residual connection.
if
self
.
apply_residual_connection_post_layernorm
:
if
self
.
apply_residual_connection_post_layernorm
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment