"src/diffusers/quantizers/quantization_config.py" did not exist on "24281f8036761e786303cdff954c85354e0e16ce"
Commit 43529f78 authored by mohammad's avatar mohammad
Browse files

addressed deepaks comments

parent 242770dd
...@@ -103,7 +103,7 @@ class DynamicGradScaler(MegatronGradScaler): ...@@ -103,7 +103,7 @@ class DynamicGradScaler(MegatronGradScaler):
if found_inf: if found_inf:
self._growth_tracker = 0 self._growth_tracker = 0
self._hysteresis_tracker -= 1 self._hysteresis_tracker -= 1
# Now if we are our of hysteresis count, scale down the loss. # Now if we are out of hysteresis count, scale down the loss.
if self._hysteresis_tracker <= 0: if self._hysteresis_tracker <= 0:
self._scale = torch.max(self._scale * self.backoff_factor, self._scale = torch.max(self._scale * self.backoff_factor,
self.min_scale) self.min_scale)
......
...@@ -375,7 +375,7 @@ class FP32Optimizer(MegatronOptimizer): ...@@ -375,7 +375,7 @@ class FP32Optimizer(MegatronOptimizer):
@torch.no_grad() @torch.no_grad()
def step(self): def step(self):
"""Clip gradients (if needed) and step the base optimizer. """Clip gradients (if needed) and step the base optimizer.
Always return auccessful since there is no overflow.""" Always return successful since there is no overflow."""
# Clip gradients. # Clip gradients.
if self.clip_grad > 0.0: if self.clip_grad > 0.0:
......
...@@ -183,6 +183,9 @@ def get_model(model_provider_func): ...@@ -183,6 +183,9 @@ def get_model(model_provider_func):
model = model_provider_func() model = model_provider_func()
# Set tensor model parallel attributes if not set. # Set tensor model parallel attributes if not set.
# Only parameters that are already tensor model parallel have these
# attributes set for them. We should make sure the default attributes
# are set for all params so the optimizer can use them.
for param in model.parameters(): for param in model.parameters():
mpu.set_defaults_if_not_set_tensor_model_parallel_attributes(param) mpu.set_defaults_if_not_set_tensor_model_parallel_attributes(param)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment