Commit 2c9ed910 authored by Lawrence McAfee's avatar Lawrence McAfee
Browse files

some fixes.

parent b1781f31
...@@ -100,8 +100,8 @@ def clip_grad_norm_fp32(parameters, grads_for_norm, ...@@ -100,8 +100,8 @@ def clip_grad_norm_fp32(parameters, grads_for_norm,
total_norm = total_norm.item() ** (1.0 / norm_type) total_norm = total_norm.item() ** (1.0 / norm_type)
# >>> # >>>
from lutil import pax, tp, print_seq # from lutil import pax, tp, print_seq
print_seq("norm : grad %s, total %s." % (grad_norm.item(), total_norm)) # print_seq("norm : grad %s, total %s." % (grad_norm.item(), total_norm))
# <<< # <<<
# Scale. # Scale.
......
...@@ -122,14 +122,6 @@ class MegatronOptimizer(ABC): ...@@ -122,14 +122,6 @@ class MegatronOptimizer(ABC):
if grad_not_none and is_not_shared and is_not_tp_duplicate: if grad_not_none and is_not_shared and is_not_tp_duplicate:
grads_for_norm.append(grad) grads_for_norm.append(grad)
# >>>
# from lutil import pax
# pax(0, {
# "params" : params,
# "grads_for_norm" : grads_for_norm,
# })
# <<<
return grads_for_norm return grads_for_norm
...@@ -141,16 +133,6 @@ class MegatronOptimizer(ABC): ...@@ -141,16 +133,6 @@ class MegatronOptimizer(ABC):
def clip_grad_norm(self, clip_grad): def clip_grad_norm(self, clip_grad):
params = self.get_parameters() params = self.get_parameters()
grads_for_norm = self.get_main_grads_for_grad_norm() grads_for_norm = self.get_main_grads_for_grad_norm()
# >>>
from lutil import print_seq
# print_seq("params %d, ngrads %d." % (len(params), len(grads_for_norm)))
# print_seq([
# "grads_for_norm / %d = %s." % (i, str(tuple(g.shape)))
# for i, g in enumerate(grads_for_norm)
# ])
print_seq("grads_for_norm = %s." % ", ".join(
str(tuple(g.shape)) for g in grads_for_norm))
# <<<
return clip_grad_norm_fp32( return clip_grad_norm_fp32(
params, grads_for_norm, clip_grad, params, grads_for_norm, clip_grad,
model_parallel_group=self.get_model_parallel_group()) model_parallel_group=self.get_model_parallel_group())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment