Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
5ed304e4
Commit
5ed304e4
authored
Apr 05, 2023
by
Mostofa Patwary
Browse files
addressing the comments
parent
0760822b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
3 additions
and
36 deletions
+3
-36
megatron/arguments.py
megatron/arguments.py
+1
-1
megatron/model/__init__.py
megatron/model/__init__.py
+0
-1
megatron/model/fused_layer_norm.py
megatron/model/fused_layer_norm.py
+2
-28
megatron/model/transformer.py
megatron/model/transformer.py
+0
-6
No files found.
megatron/arguments.py
View file @
5ed304e4
...
@@ -515,7 +515,7 @@ def _add_network_size_args(parser):
...
@@ -515,7 +515,7 @@ def _add_network_size_args(parser):
group
.
add_argument
(
'--layernorm-epsilon'
,
type
=
float
,
default
=
1e-5
,
group
.
add_argument
(
'--layernorm-epsilon'
,
type
=
float
,
default
=
1e-5
,
help
=
'Layer norm epsilon.'
)
help
=
'Layer norm epsilon.'
)
group
.
add_argument
(
'--apply-layernorm-1p'
,
action
=
'store_true'
,
group
.
add_argument
(
'--apply-layernorm-1p'
,
action
=
'store_true'
,
help
=
'
Use layernorm 1p
'
)
help
=
'
Weight adjustment centered around zero.
'
)
group
.
add_argument
(
'--apply-residual-connection-post-layernorm'
,
group
.
add_argument
(
'--apply-residual-connection-post-layernorm'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'If set, use original BERT residula connection '
help
=
'If set, use original BERT residula connection '
...
...
megatron/model/__init__.py
View file @
5ed304e4
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
from
.fused_layer_norm
import
MixedFusedLayerNorm
as
LayerNorm
from
.fused_layer_norm
import
MixedFusedLayerNorm
as
LayerNorm
#from .fused_layer_norm import MixedFusedLayerNorm1P as LayerNorm1P
from
.distributed
import
DistributedDataParallel
from
.distributed
import
DistributedDataParallel
from
.bert_model
import
BertModel
from
.bert_model
import
BertModel
...
...
megatron/model/fused_layer_norm.py
View file @
5ed304e4
...
@@ -126,29 +126,3 @@ class MixedFusedLayerNorm(torch.nn.Module):
...
@@ -126,29 +126,3 @@ class MixedFusedLayerNorm(torch.nn.Module):
keep_graph
=
True
)
keep_graph
=
True
)
return
output
return
output
#class MixedFusedLayerNorm1P(MixedFusedLayerNorm):
# def reset_parameters(self):
# init.zeros_(self.weight)
# init.zeros_(self.bias)
#
# def forward(self, input):
#
# if self.no_persist_layer_norm:
# return FusedLayerNormAffineFunction.apply(
# input, self.weight + 1, self.bias, self.normalized_shape, self.eps)
# else:
# output = FastLayerNormFN.apply(
# input, self.weight + 1, self.bias, self.eps)
#
# # Apex's fast layer norm function outputs a 'view' tensor (i.e., has
# # a populated '_base' field). This will result in schedule.py's
# # deallocate_output_tensor() throwing an error, so a viewless tensor is
# # created to prevent this.
# output = make_viewless_tensor(inp = output,
# requires_grad = input.requires_grad,
# keep_graph = True)
#
# return output
megatron/model/transformer.py
View file @
5ed304e4
...
@@ -638,9 +638,6 @@ class ParallelTransformerLayer(MegatronModule):
...
@@ -638,9 +638,6 @@ class ParallelTransformerLayer(MegatronModule):
apply_layernorm_1p
=
False
apply_layernorm_1p
=
False
if
args
.
apply_layernorm_1p
:
if
args
.
apply_layernorm_1p
:
apply_layernorm_1p
=
True
apply_layernorm_1p
=
True
#from megatron.model import LayerNorm1P as LayerNorm
#else:
# from megatron.model import LayerNorm
# Layernorm on the input data.
# Layernorm on the input data.
self
.
input_layernorm
=
LayerNorm
(
self
.
input_layernorm
=
LayerNorm
(
...
@@ -1033,9 +1030,6 @@ class ParallelTransformer(MegatronModule):
...
@@ -1033,9 +1030,6 @@ class ParallelTransformer(MegatronModule):
apply_layernorm_1p
=
False
apply_layernorm_1p
=
False
if
args
.
apply_layernorm_1p
:
if
args
.
apply_layernorm_1p
:
apply_layernorm_1p
=
True
apply_layernorm_1p
=
True
#from megatron.model import LayerNorm1P as LayerNorm
#else:
# from megatron.model import LayerNorm
if
self
.
post_process
and
self
.
post_layer_norm
:
if
self
.
post_process
and
self
.
post_layer_norm
:
# Final layer norm before output.
# Final layer norm before output.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment