Unverified Commit 3ce531c9 authored by Jeff Rasley's avatar Jeff Rasley Committed by GitHub
Browse files

Upgrade apex version, turn off legacy fusion (#205)

* update apex version to feb 5th commit

* use gradient clipping instead of max grad norm in tests

* add warning when user provides max_grad_norm

* update examples commit
parent f8fa1325
Subproject commit 6a698b2185dc332d1a35e2353b55a38899a36ef3 Subproject commit 9e2c735f5aabe48395c03a276fa7a0c51f6d3025
...@@ -25,7 +25,7 @@ OPTIMIZER_TYPE_DEFAULT = None ...@@ -25,7 +25,7 @@ OPTIMIZER_TYPE_DEFAULT = None
OPTIMIZER_PARAMS = "params" OPTIMIZER_PARAMS = "params"
TYPE = "type" TYPE = "type"
LEGACY_FUSION = "legacy_fusion" LEGACY_FUSION = "legacy_fusion"
LEGACY_FUSION_DEFAULT = True LEGACY_FUSION_DEFAULT = False
SCHEDULER = "scheduler" SCHEDULER = "scheduler"
SCHEDULER_TYPE_DEFAULT = None SCHEDULER_TYPE_DEFAULT = None
SCHEDULER_PARAMS = "params" SCHEDULER_PARAMS = "params"
......
...@@ -5,6 +5,7 @@ Copyright 2019 The Microsoft DeepSpeed Team ...@@ -5,6 +5,7 @@ Copyright 2019 The Microsoft DeepSpeed Team
import logging import logging
import torch import torch
import os import os
import warnings
import torch.distributed as dist import torch.distributed as dist
from torch.nn.modules import Module from torch.nn.modules import Module
...@@ -465,6 +466,9 @@ class DeepSpeedLight(Module): ...@@ -465,6 +466,9 @@ class DeepSpeedLight(Module):
def _configure_basic_optimizer(self, model_parameters): def _configure_basic_optimizer(self, model_parameters):
optimizer_parameters = self.optimizer_params() optimizer_parameters = self.optimizer_params()
if self.fp16_enabled() and 'max_grad_norm' in optimizer_parameters.keys(): if self.fp16_enabled() and 'max_grad_norm' in optimizer_parameters.keys():
warnings.warn(
"'max_grad_norm' is not supported as an optimizer parameter, please switch to using the deepspeed parameter 'gradient_clipping' see: https://www.deepspeed.ai/docs/config-json/#gradient-clipping for more details"
)
optimizer_parameters['max_grad_norm'] = 0.0 optimizer_parameters['max_grad_norm'] = 0.0
if self.optimizer_name() == ADAM_OPTIMIZER: if self.optimizer_name() == ADAM_OPTIMIZER:
from apex.optimizers.fused_adam import FusedAdam from apex.optimizers.fused_adam import FusedAdam
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
"type": "Adam", "type": "Adam",
"params": { "params": {
"lr": 3e-5, "lr": 3e-5,
"max_grad_norm": 1.0,
"weight_decay": 0.0, "weight_decay": 0.0,
"bias_correction": false "bias_correction": false
} }
}, },
"gradient_clipping": 1.0,
"fp16": { "fp16": {
"enabled": true "enabled": true
} }
......
...@@ -6,11 +6,11 @@ ...@@ -6,11 +6,11 @@
"type": "Adam", "type": "Adam",
"params": { "params": {
"lr": 3e-5, "lr": 3e-5,
"max_grad_norm": 1.0,
"weight_decay": 0.0, "weight_decay": 0.0,
"bias_correction": false "bias_correction": false
} }
}, },
"gradient_clipping": 1.0,
"fp16": { "fp16": {
"enabled": false "enabled": false
} }
......
Subproject commit 880ab925bce9f817a93988b021e12db5f67f7787 Subproject commit 494f8ab3fc1b0b26949a3bcbb2bcac78008d48c1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment