some fixes.

2c9ed910 · Lawrence McAfee · b1781f31 · 2c9ed910 · 2c9ed910
Commit 2c9ed910 authored Mar 28, 2022 by Lawrence McAfee
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 20 deletions

megatron/optimizer/clip_grads.py megatron/optimizer/clip_grads.py +2 -2

megatron/optimizer/optimizer.py megatron/optimizer/optimizer.py +0 -18

No files found.
--- a/megatron/optimizer/clip_grads.py
+++ b/megatron/optimizer/clip_grads.py
@@ -100,8 +100,8 @@ def clip_grad_norm_fp32(parameters, grads_for_norm,
        total_norm = total_norm.item() ** (1.0 / norm_type)
        # >>>
-        from lutil import pax, tp, print_seq
+        # from lutil import pax, tp, print_seq
-        print_seq("norm : grad %s, total %s." % (grad_norm.item(), total_norm))
+        # print_seq("norm : grad %s, total %s." % (grad_norm.item(), total_norm))
        # <<<
    # Scale.

--- a/megatron/optimizer/optimizer.py
+++ b/megatron/optimizer/optimizer.py
@@ -122,14 +122,6 @@ class MegatronOptimizer(ABC):
            if grad_not_none and is_not_shared and is_not_tp_duplicate:
                grads_for_norm.append(grad)
-        # >>>
-        # from lutil import pax
-        # pax(0, {
-        #     "params" : params,
-        #     "grads_for_norm" : grads_for_norm,
-        # })
-        # <<<
        return grads_for_norm
@@ -141,16 +133,6 @@ class MegatronOptimizer(ABC):
    def clip_grad_norm(self, clip_grad):
        params = self.get_parameters()
        grads_for_norm = self.get_main_grads_for_grad_norm()
-        # >>>
-        from lutil import print_seq
-        # print_seq("params %d, ngrads %d." % (len(params), len(grads_for_norm)))
-        # print_seq([
-        #     "grads_for_norm / %d = %s." % (i, str(tuple(g.shape)))
-        #     for i, g in enumerate(grads_for_norm)
-        # ])
-        print_seq("grads_for_norm = %s." % ", ".join(
-            str(tuple(g.shape)) for g in grads_for_norm))
-        # <<<
        return clip_grad_norm_fp32(
            params, grads_for_norm, clip_grad,
            model_parallel_group=self.get_model_parallel_group())