Commit d0925652 authored by Lawrence McAfee's avatar Lawrence McAfee
Browse files

debugging fp32 training.

parent 9f64f5f4
...@@ -31,6 +31,7 @@ from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper ...@@ -31,6 +31,7 @@ from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper
from lutil import pax, tp, print_seq from lutil import pax, tp, print_seq
# <<< # <<<
class Range: class Range:
def __init__(self, start, end): def __init__(self, start, end):
......
...@@ -52,10 +52,6 @@ from megatron.schedules import get_forward_backward_func ...@@ -52,10 +52,6 @@ from megatron.schedules import get_forward_backward_func
from megatron.utils import report_memory from megatron.utils import report_memory
from megatron.model.vision.knn_monitor import compute_feature_bank from megatron.model.vision.knn_monitor import compute_feature_bank
# >>>
from lutil import pax, tp, print_seq
# <<<
def print_datetime(string): def print_datetime(string):
"""Note that this call will sync across all ranks.""" """Note that this call will sync across all ranks."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment