jan 11 alpha

05042081 · Lawrence McAfee · 270d6412 · 05042081 · 05042081
Commit 05042081 authored Jan 11, 2022 by Lawrence McAfee
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 0 deletions

megatron/mpu/random.py megatron/mpu/random.py +1 -0

megatron/schedules.py megatron/schedules.py +28 -0

No files found.
--- a/megatron/mpu/random.py
+++ b/megatron/mpu/random.py
@@ -329,6 +329,7 @@ class CheckpointFunction(torch.autograd.Function):
        # the chunk corresponding to the current rank.
        if distribute_checkpointed_activations:
            # >>>
+            # raise Exception("distrib.")
            # from lutil import data_leak_ctx
            # with data_leak_ctx(args[0]):
            # <<<

--- a/megatron/schedules.py
+++ b/megatron/schedules.py
@@ -115,7 +115,17 @@ def forward_step(forward_step_func, data_iterator, model, input_tensor, losses_r
        unwrap_output_tensor = True

    unwrapped_model.set_input_tensor(input_tensor)
+    # >>>
+    # if input_tensor[0] is not None:
+    #     from lutil import pax, tp
+    #     pax({"input_tensor": tp(input_tensor)})
+    # <<<
    output_tensor, loss_func = forward_step_func(data_iterator, model)
+    # >>>
+    # if input_tensor[0] is not None:
+    #     from lutil import pax, tp
+    #     pax({"input_tensor": tp(input_tensor)})
+    # <<<
    if mpu.is_pipeline_last_stage():
        output_tensor = loss_func(output_tensor)
        loss, loss_reduced = output_tensor
@@ -626,11 +636,29 @@ def forward_backward_pipelining_without_interleaving(forward_step_func, data_ite
    # Run warmup forward passes.
    for i in range(num_warmup_microbatches):
        input_tensor = recv_forward(recv_tensor_shapes, timers=timers)
+        # >>>
+        # if input_tensor[0] is not None:
+        #     from lutil import pax
+        #     pax({"input_tensor": input_tensor})
+        # <<<
        output_tensor = forward_step(forward_step_func, data_iterator, model,
                                     input_tensor, losses_reduced)
+        # >>>
+        # if True or input_tensor[0] is not None:
+        #     from lutil import pax
+        #     pax({"input_tensor": input_tensor})
+        # <<<
        send_forward(output_tensor, send_tensor_shapes, timers=timers)

        if not forward_only:
+            # >>>
+            # if input_tensor[0] is not None:
+            #     from lutil import pax
+            #     pax({"input_tensor": input_tensor})
+            # if output_tensor[0] is not None:
+            #     from lutil import pax
+            #     pax(0, {"output_tensor / 0": output_tensor[0]})
+            # <<<
            assert_viewless_tensor(input_tensor)
            assert_viewless_tensor(output_tensor)
            input_tensors.append(input_tensor)