Reset cache logic of weight workspace for NVFP4TensorStorage (#2524)

reset weight ws cache for NVFP4TensorStorage Signed-off-by: Jinhang Choi <jinhangc@nvidia.com>

Reset cache logic of weight workspace for NVFP4TensorStorage (#2524)
reset weight ws cache for NVFP4TensorStorage Signed-off-by: Jinhang Choi <jinhangc@nvidia.com>
dbd0197e · Jinhang Choi · GitHub · eac8af6a · dbd0197e
Unverified Commit dbd0197e authored Dec 16, 2025 by Jinhang Choi Committed by GitHub Dec 16, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 0 deletions

transformer_engine/pytorch/module/base.py transformer_engine/pytorch/module/base.py +6 -0

No files found.
--- a/transformer_engine/pytorch/module/base.py
+++ b/transformer_engine/pytorch/module/base.py
@@ -45,6 +45,7 @@ from ..tensor.mxfp8_tensor import MXFP8Quantizer
 from ..tensor.float8_blockwise_tensor import Float8BlockQuantizer
 from ..tensor.storage.float8_tensor_storage import Float8TensorStorage
 from ..tensor.storage.mxfp8_tensor_storage import MXFP8TensorStorage
+from ..tensor.storage.nvfp4_tensor_storage import NVFP4TensorStorage
 from ..utils import (
    is_non_tn_fp8_gemm_supported,
    torch_get_autocast_gpu_dtype,
@@ -1388,6 +1389,11 @@ class TransformerEngineBaseModule(torch.nn.Module, ABC):
                    reset_cache = True
                elif quantizer.columnwise_usage and out._columnwise_data is None:
                    reset_cache = True
+            elif isinstance(out, NVFP4TensorStorage):
+                if quantizer.rowwise_usage and out._rowwise_data is None:
+                    reset_cache = True
+                elif quantizer.columnwise_usage and out._columnwise_data is None:
+                    reset_cache = True
            if isinstance(out, DebugQuantizedTensor) != isinstance(quantizer, DebugQuantizer):
                reset_cache = True
            if reset_cache: