Commit 284d3f6f authored by maxiao3's avatar maxiao3 Committed by wenjh
Browse files

Fix issues related to L1cpp tests



1,not find nvte_dgelu
2,fsdp_group is not none
3,CPUOffloadEnabled change to cpp_offload_v1
Signed-off-by: default avatarmaxiao3 <maxiao3@sugon.com>

See merge request dcutoolkit/deeplearing/TransformerEngine!74
parent 8fc9d8f1
......@@ -392,10 +392,10 @@ if "NVTE_PROJECT_BUILDING" not in os.environ or bool(int(os.getenv("NVTE_RELEASE
_CURAND_LIB_CTYPES = _load_curand()
_CUBLAS_LIB_CTYPES = _load_nvidia_cuda_library("cublas")
_CUDART_LIB_CTYPES = _load_nvidia_cuda_library("cuda_runtime")
_TE_LIB_CTYPES = _load_core_library()
# Needed to find the correct headers for NVRTC kernels.
if not os.getenv("NVTE_CUDA_INCLUDE_DIR") and _nvidia_cudart_include_dir():
os.environ["NVTE_CUDA_INCLUDE_DIR"] = _nvidia_cudart_include_dir()
except OSError:
pass
_TE_LIB_CTYPES = _load_core_library()
......@@ -605,7 +605,7 @@ class BatchedLinear(TransformerEngineBaseModule):
weight_tensors_fp8 = [None] * int(self.num_gemms)
from ..cpu_offload import CPUOffloadEnabled
from ..cpu_offload_v1 import CPUOffloadEnabled
if torch.is_grad_enabled():
linear_fn = _BatchLinear.apply
......
......@@ -117,7 +117,6 @@ class _Linear(torch.autograd.Function):
ub_bulk_dgrad: bool,
ub_bulk_wgrad: bool,
ub_name: str,
fine_grained_activation_offloading: bool,
fp8_output: bool, # pylint: disable=unused-argument
fsdp_group: Union[dist_group_type, None],
module: torch.nn.Module,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment