"packaging/vscode:/vscode.git/clone" did not exist on "df8d7767d0f47f7e6869b9d2f92a902c5cb6e03d"
Unverified Commit d5241b70 authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Allow the initial value to be set in dynamic mode (#692)

* allow the initial value to be set in dynamic mode

* add logging for grad norm back

* resolve comments
parent a0902e6d
...@@ -48,8 +48,12 @@ class Fp16OptimizerHook(OptimizerHook): ...@@ -48,8 +48,12 @@ class Fp16OptimizerHook(OptimizerHook):
Refer to https://arxiv.org/abs/1710.03740 for more details. Refer to https://arxiv.org/abs/1710.03740 for more details.
Args: Args:
loss_scale (float | str): Scale factor multiplied with loss. If loss_scale (float | str | dict): Scale factor multiplied with loss.
'dynamic' is specified, then dynamic loss scaling will be used. If loss_scale is a float, static loss scaling will be used with
the specified scale. If loss_scale is a string, it must be
'dynamic', then dynamic loss scaling will be used.
It can also be a dict containing arguments of LossScaler.
Defaults to 512.
""" """
def __init__(self, def __init__(self,
...@@ -66,8 +70,11 @@ class Fp16OptimizerHook(OptimizerHook): ...@@ -66,8 +70,11 @@ class Fp16OptimizerHook(OptimizerHook):
self.loss_scaler = LossScaler(mode='dynamic') self.loss_scaler = LossScaler(mode='dynamic')
elif isinstance(loss_scale, float): elif isinstance(loss_scale, float):
self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static') self.loss_scaler = LossScaler(init_scale=loss_scale, mode='static')
elif isinstance(loss_scale, dict):
self.loss_scaler = LossScaler(**loss_scale)
else: else:
raise ValueError('loss_scale must be of type float or str') raise ValueError('loss_scale must be of type float, dict, or '
f'"dynamic", got {loss_scale}')
def before_run(self, runner): def before_run(self, runner):
"""Preparing steps before Mixed Precision Training. """Preparing steps before Mixed Precision Training.
...@@ -139,7 +146,11 @@ class Fp16OptimizerHook(OptimizerHook): ...@@ -139,7 +146,11 @@ class Fp16OptimizerHook(OptimizerHook):
if param.grad is not None: if param.grad is not None:
param.grad.div_(self.loss_scaler.loss_scale) param.grad.div_(self.loss_scaler.loss_scale)
if self.grad_clip is not None: if self.grad_clip is not None:
self.clip_grads(fp32_weights) grad_norm = self.clip_grads(fp32_weights)
if grad_norm is not None:
# Add grad norm to the logger
runner.log_buffer.update({'grad_norm': float(grad_norm)},
runner.outputs['num_samples'])
# update fp32 params # update fp32 params
runner.optimizer.step() runner.optimizer.step()
# copy fp32 params to the fp16 model # copy fp32 params to the fp16 model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment