"transformer_engine/pytorch/quantization.py" did not exist on "7530b768d8d6203dfb94fb202d74aa7a4e58377a"
Unverified Commit 1d1d3233 authored by Paweł Gadziński's avatar Paweł Gadziński Committed by GitHub
Browse files

[PyTorch Debug] Fixed the empty tensor bug in statistics computation (#1843)



* fixed the bug
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* lint fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



* fix
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* test change
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



---------
Signed-off-by: default avatarPawel Gadzinski <pgadzinski@nvidia.com>
Co-authored-by: default avatarpre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
parent 964c2ed2
...@@ -34,6 +34,6 @@ def test_debug_distributed(feature_dirs): ...@@ -34,6 +34,6 @@ def test_debug_distributed(feature_dirs):
test_path = TEST_ROOT / "run_distributed.py" test_path = TEST_ROOT / "run_distributed.py"
test_cmd = LAUNCH_CMD + [str(test_path), f"--feature_dirs={feature_dirs[0]}"] test_cmd = LAUNCH_CMD + [str(test_path), f"--feature_dirs={feature_dirs[0]}"]
result = subprocess.run(test_cmd, env=os.environ, capture_output=True, check=False) result = subprocess.run(test_cmd, env=os.environ, check=False, text=True)
if result.returncode != 0: if result.returncode != 0:
raise AssertionError(result.stderr.decode()) raise AssertionError(f"torchrun exited with {result.returncode}")
...@@ -262,6 +262,18 @@ def _get_tensors(): ...@@ -262,6 +262,18 @@ def _get_tensors():
return x, weight return x, weight
LOGGING_CONFIG = """logging_config:
enabled: True
layers:
layer_types: [linear]
transformer_engine:
LogTensorStats:
enabled: True
tensors: [activation, gradient, weight, output, wgrad, dgrad]
stats: [min, max, mean, std, l1_norm, l2_norm, cur_amax, dynamic_range]
"""
DISABLE_FP8_CONFIG = Template( DISABLE_FP8_CONFIG = Template(
"""disable_fp8_config: """disable_fp8_config:
enabled: True enabled: True
...@@ -275,6 +287,24 @@ DISABLE_FP8_CONFIG = Template( ...@@ -275,6 +287,24 @@ DISABLE_FP8_CONFIG = Template(
) )
@create_config_file
def run_logging_zero_numel_tensor(feature_dirs, **kwargs):
kwargs["config_file"].write(LOGGING_CONFIG)
kwargs["config_file"].flush()
_init_debug(kwargs["config_file"].name, kwargs["log_dir"], feature_dirs)
x, weight = _get_tensors()
x1 = x[:0, :]
model = _init_model(weight)
_ = _run_forward_backward(x1, model)
_ = _run_forward_backward(x, model)
def test_logging_zero_numel_tensor(feature_dirs):
run_logging_zero_numel_tensor(feature_dirs)
@pytest.mark.parametrize("fprop_fp8", all_boolean) @pytest.mark.parametrize("fprop_fp8", all_boolean)
@pytest.mark.parametrize("dgrad_fp8", all_boolean) @pytest.mark.parametrize("dgrad_fp8", all_boolean)
@pytest.mark.parametrize("wgrad_fp8", all_boolean) @pytest.mark.parametrize("wgrad_fp8", all_boolean)
......
...@@ -85,6 +85,13 @@ class _Buffer: ...@@ -85,6 +85,13 @@ class _Buffer:
if self.modified[0] and not self.reduce_within_microbatch: if self.modified[0] and not self.reduce_within_microbatch:
return return
if (
tensor.numel() == 0
if hasattr(tensor, "numel")
else all((t is None or t.numel() == 0) for t in tensor.get_data_tensors())
):
return
# save stats for tensor to tmp buffer # save stats for tensor to tmp buffer
for stat_name in self.stats_to_compute: for stat_name in self.stats_to_compute:
fn, _ = STATS[stat_name] fn, _ = STATS[stat_name]
......
...@@ -17,6 +17,8 @@ def _compute_dynamic_range_top(tensor): ...@@ -17,6 +17,8 @@ def _compute_dynamic_range_top(tensor):
"""Computes the log2 of the amax of the tensor""" """Computes the log2 of the amax of the tensor"""
tensor_abs = tensor.abs() tensor_abs = tensor.abs()
tensor_abs = tensor_abs[tensor_abs != 0] tensor_abs = tensor_abs[tensor_abs != 0]
if tensor_abs.numel() == 0:
return torch.inf
amax = tensor_abs.max().float() amax = tensor_abs.max().float()
if not amax.all(): if not amax.all():
amax = torch.tensor(1, device=tensor.device).to(torch.float) amax = torch.tensor(1, device=tensor.device).to(torch.float)
...@@ -125,7 +127,7 @@ STATS = { ...@@ -125,7 +127,7 @@ STATS = {
lambda buffers: min(_get(buffers, "dynamic_range_bottom")), lambda buffers: min(_get(buffers, "dynamic_range_bottom")),
), ),
"underflows_num": ( "underflows_num": (
lambda x: (x._data == 0).sum(), lambda x: (x.get_data_tensors()[0] == 0).sum(),
lambda buffers: sum(_get(buffers, "underflows_num")), lambda buffers: sum(_get(buffers, "underflows_num")),
), ),
"std": ( "std": (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment