Merge branch 'develop_v2.10' into release_v2.10

aeceeac0 · wenjh · bd05b0dc · 284d3f6f · aeceeac0 · aeceeac0
Commit aeceeac0 authored Jan 23, 2026 by wenjh
Showing with 2 additions and 2 deletions

transformer_engine/common/__init__.py transformer_engine/common/__init__.py +1 -1

transformer_engine/pytorch/module/batched_linear.py transformer_engine/pytorch/module/batched_linear.py +1 -1

No files found.
--- a/transformer_engine/common/__init__.py
+++ b/transformer_engine/common/__init__.py
@@ -392,10 +392,10 @@ if "NVTE_PROJECT_BUILDING" not in os.environ or bool(int(os.getenv("NVTE_RELEASE
        _CURAND_LIB_CTYPES = _load_curand()
        _CUBLAS_LIB_CTYPES = _load_nvidia_cuda_library("cublas")
        _CUDART_LIB_CTYPES = _load_nvidia_cuda_library("cuda_runtime")
-        _TE_LIB_CTYPES = _load_core_library()
        # Needed to find the correct headers for NVRTC kernels.
        if not os.getenv("NVTE_CUDA_INCLUDE_DIR") and _nvidia_cudart_include_dir():
            os.environ["NVTE_CUDA_INCLUDE_DIR"] = _nvidia_cudart_include_dir()
    except OSError:
        pass
+    _TE_LIB_CTYPES = _load_core_library()
--- a/transformer_engine/pytorch/module/batched_linear.py
+++ b/transformer_engine/pytorch/module/batched_linear.py
@@ -605,7 +605,7 @@ class BatchedLinear(TransformerEngineBaseModule):
            weight_tensors_fp8 = [None] * int(self.num_gemms)
-            from ..cpu_offload import CPUOffloadEnabled
+            from ..cpu_offload_v1 import CPUOffloadEnabled
            if torch.is_grad_enabled():
                linear_fn = _BatchLinear.apply