Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
84a5997a
Commit
84a5997a
authored
Jul 29, 2020
by
Boris Fomitchev
Browse files
Merge remote-tracking branch 'upstream/master' into onnx-erf
parents
bb7c638f
07ebf714
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
22 additions
and
5 deletions
+22
-5
megatron/initialize.py
megatron/initialize.py
+6
-0
megatron/model/transformer.py
megatron/model/transformer.py
+1
-2
megatron/mpu/__init__.py
megatron/mpu/__init__.py
+1
-0
megatron/mpu/grads.py
megatron/mpu/grads.py
+6
-2
megatron/mpu/layers.py
megatron/mpu/layers.py
+8
-1
No files found.
megatron/initialize.py
View file @
84a5997a
...
...
@@ -39,6 +39,12 @@ def initialize_megatron(extra_args_provider=None, args_defaults={},
# Make sure cuda is available.
assert
torch
.
cuda
.
is_available
(),
'Megatron requires CUDA.'
# This is temporary WAR to make simple case like pytest calling with same args twice
# Need to implement clean factory init.
if
mpu
.
model_parallel_is_initialized
():
return
# Parse args, build tokenizer, and set adlr-autoresume,
# tensorboard-writer, and timers.
set_global_variables
(
extra_args_provider
=
extra_args_provider
,
...
...
megatron/model/transformer.py
View file @
84a5997a
...
...
@@ -16,12 +16,11 @@
"""Transformer."""
import
math
import
torch
from
apex.normalization.fused_layer_norm
import
FusedLayerNorm
as
LayerNorm
from
megatron
import
get_args
from
megatron
import
mpu
from
megatron.mpu
import
LayerNorm
from
megatron.module
import
MegatronModule
...
...
megatron/mpu/__init__.py
View file @
84a5997a
...
...
@@ -32,6 +32,7 @@ from .initialize import get_model_parallel_world_size
from
.initialize
import
initialize_model_parallel
from
.initialize
import
model_parallel_is_initialized
from
.layers
import
LayerNorm
from
.layers
import
ColumnParallelLinear
from
.layers
import
ParallelEmbedding
from
.layers
import
RowParallelLinear
...
...
megatron/mpu/grads.py
View file @
84a5997a
...
...
@@ -21,8 +21,12 @@
import
torch
from
torch._six
import
inf
from
apex.multi_tensor_apply
import
multi_tensor_applier
import
amp_C
try
:
from
apex.multi_tensor_apply
import
multi_tensor_applier
import
amp_C
except
Exception
as
e
:
print
(
'WARNING: APEX is not installed, multi_tensor_applier will not be available.'
)
from
.initialize
import
get_model_parallel_group
from
.initialize
import
get_model_parallel_rank
...
...
megatron/mpu/layers.py
View file @
84a5997a
...
...
@@ -25,7 +25,14 @@ import torch.nn.functional as F
import
torch.nn.init
as
init
from
torch.nn.parameter
import
Parameter
from
apex.normalization.fused_layer_norm
import
FusedLayerNorm
as
LayerNorm
try
:
from
apex.normalization.fused_layer_norm
import
FusedLayerNorm
as
LayerNorm
# Try to use FusedLayerNorm from Apex - this will trigger an error.
_
=
LayerNorm
(
8
,
eps
=
1e-5
)
except
Exception
as
e
:
print
(
'WARNING: APEX is not installed, using torch.nn.LayerNorm instead of apex.normalization.FusedLayerNorm!'
)
from
torch.nn
import
LayerNorm
from
.initialize
import
get_model_parallel_rank
from
.initialize
import
get_model_parallel_world_size
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment