Merge branch 'apex-fix' into 'master'

Initialization fixes: allowing simple case like pytest pass, also making apex optional See merge request ADLR/megatron-lm!99

Merge branch 'apex-fix' into 'master'
Initialization fixes: allowing simple case like pytest pass, also making apex optional See merge request ADLR/megatron-lm!99
bf97f933 · Jared Casper · 05620ee4 · b04eb0a2 · bf97f933 · bf97f933
Commit bf97f933 authored Jul 24, 2020 by Jared Casper
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 1 deletion

megatron/initialize.py megatron/initialize.py +6 -0

megatron/model/transformer.py megatron/model/transformer.py +8 -1

No files found.
--- a/megatron/initialize.py
+++ b/megatron/initialize.py
@@ -39,6 +39,12 @@ def initialize_megatron(extra_args_provider=None, args_defaults={},
        # Make sure cuda is available.
        assert torch.cuda.is_available(), 'Megatron requires CUDA.'

+    # This is temporary WAR to make simple case like pytest calling with same args twice
+    # Need to implement clean factory init.
+    if mpu.model_parallel_is_initialized():
+        return
+    
+    
    # Parse args, build tokenizer, and set adlr-autoresume,
    # tensorboard-writer, and timers.
    set_global_variables(extra_args_provider=extra_args_provider,

--- a/megatron/model/transformer.py
+++ b/megatron/model/transformer.py
@@ -18,7 +18,14 @@
 import math

 import torch
-from apex.normalization.fused_layer_norm import FusedLayerNorm as LayerNorm
+try:
+    from apex.normalization.fused_layer_norm import FusedLayerNorm as LayerNorm
+    # Try to use FusedLayerNorm from Apex - this will trigger an error.
+    _ = FusedLayerNorm(8, eps=1e-5)
+
+except Exception as e:
+    print('WARNING: APEX is not available, using torch.nn.LayerNorm instead of apex.normalization.FusedLayerNorm!')
+    from torch.nn import LayerNorm

 from megatron import get_args
 from megatron import mpu