Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
83671bbf
Commit
83671bbf
authored
Dec 21, 2020
by
mshoeybi
Committed by
Deepak Narayanan
Dec 22, 2020
Browse files
Address Deepak's comments
parent
8bed1d63
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
1 deletion
+3
-1
megatron/arguments.py
megatron/arguments.py
+1
-1
megatron/model/transformer.py
megatron/model/transformer.py
+2
-0
No files found.
megatron/arguments.py
View file @
83671bbf
...
@@ -185,7 +185,7 @@ def parse_args(extra_args_provider=None, defaults={},
...
@@ -185,7 +185,7 @@ def parse_args(extra_args_provider=None, defaults={},
assert
args
.
fp16
,
'lm cross entropy in fp16 only support in fp16 mode.'
assert
args
.
fp16
,
'lm cross entropy in fp16 only support in fp16 mode.'
if
args
.
fp32_residual_connection
:
if
args
.
fp32_residual_connection
:
assert
args
.
fp16
,
\
assert
args
.
fp16
,
\
'residual connection in fp32 only supports
in fp16
mode
.'
'residual connection in fp32 only support
ed when u
sin
g
fp16.'
# Activation checkpointing.
# Activation checkpointing.
if
args
.
distribute_checkpointed_activations
:
if
args
.
distribute_checkpointed_activations
:
assert
args
.
checkpoint_activations
,
\
assert
args
.
checkpoint_activations
,
\
...
...
megatron/model/transformer.py
View file @
83671bbf
...
@@ -568,8 +568,10 @@ class ParallelTransformer(MegatronModule):
...
@@ -568,8 +568,10 @@ class ParallelTransformer(MegatronModule):
if
mpu
.
is_pipeline_first_stage
():
if
mpu
.
is_pipeline_first_stage
():
# Data format change to avoid explicit tranposes : [b s h] --> [s b h].
# Data format change to avoid explicit tranposes : [b s h] --> [s b h].
# If the input flag for fp32 residual connection is set, convert for float.
if
self
.
fp32_residual_connection
:
if
self
.
fp32_residual_connection
:
hidden_states
=
hidden_states
.
transpose
(
0
,
1
).
contiguous
().
float
()
hidden_states
=
hidden_states
.
transpose
(
0
,
1
).
contiguous
().
float
()
# Otherwise, leave it as is.
else
:
else
:
hidden_states
=
hidden_states
.
transpose
(
0
,
1
).
contiguous
()
hidden_states
=
hidden_states
.
transpose
(
0
,
1
).
contiguous
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment