"vscode:/vscode.git/clone" did not exist on "deef9a3df883b76caeca7d35ccbe960a66af29de"
Commit 8b4bc8a3 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.11.0-nccl_bugfix' into 'v0.11.0-dev'

[Bugfix] Fixing trying to import non-existent symbols from libnccl.so

See merge request dcutoolkit/deeplearing/vllm!355
parents e80886ff 227dd87c
...@@ -30,7 +30,9 @@ from typing import Any, Optional ...@@ -30,7 +30,9 @@ from typing import Any, Optional
import torch import torch
from torch.distributed import ReduceOp from torch.distributed import ReduceOp
from vllm import envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils import find_nccl_library from vllm.utils import find_nccl_library
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -226,21 +228,21 @@ class NCCLLibrary: ...@@ -226,21 +228,21 @@ class NCCLLibrary:
# ncclResult_t ncclCommWindowRegister( # ncclResult_t ncclCommWindowRegister(
# ncclComm_t comm, void* buff, size_t size, # ncclComm_t comm, void* buff, size_t size,
# ncclWindow_t* win, int winFlags); # ncclWindow_t* win, int winFlags);
#Function( Function(
# "ncclCommWindowRegister", "ncclCommWindowRegister",
# ncclResult_t, ncclResult_t,
# [ [
# ncclComm_t, ncclComm_t,
# buffer_type, buffer_type,
# ctypes.c_size_t, ctypes.c_size_t,
# ctypes.POINTER(ncclWindow_t), ctypes.POINTER(ncclWindow_t),
# ctypes.c_int, ctypes.c_int,
# ], ],
#), ),
# ncclResult_t ncclCommWindowDeregister( # ncclResult_t ncclCommWindowDeregister(
# ncclComm_t comm, ncclWindow_t win); # ncclComm_t comm, ncclWindow_t win);
#Function("ncclCommWindowDeregister", ncclResult_t, Function("ncclCommWindowDeregister", ncclResult_t,
# [ncclComm_t, ncclWindow_t]), [ncclComm_t, ncclWindow_t]),
] ]
# class attribute to store the mapping from the path to the library # class attribute to store the mapping from the path to the library
...@@ -275,10 +277,27 @@ class NCCLLibrary: ...@@ -275,10 +277,27 @@ class NCCLLibrary:
if so_file not in NCCLLibrary.path_to_dict_mapping: if so_file not in NCCLLibrary.path_to_dict_mapping:
_funcs: dict[str, Any] = {} _funcs: dict[str, Any] = {}
for func in NCCLLibrary.exported_functions: for func in NCCLLibrary.exported_functions:
f = getattr(self.lib, func.name) try:
f.restype = func.restype f = getattr(self.lib, func.name)
f.argtypes = func.argtypes f.restype = func.restype
_funcs[func.name] = f f.argtypes = func.argtypes
_funcs[func.name] = f
except AttributeError:
if func.name in [
"ncclCommWindowRegister",
"ncclCommWindowDeregister"
]:
if envs.VLLM_USE_NCCL_SYMM_MEM:
logger.warning_once(
"The symbol %s is not found in the NCCL "
"library %s. To enable VLLM_USE_NCCL_SYMM_MEM "
" please update your NCCL version to >= "
"2.27.03.", func.name, so_file)
if current_platform.is_rocm():
# Having an exception here on ROCm platform is
# not allowed during graph capturing
continue
raise
NCCLLibrary.path_to_dict_mapping[so_file] = _funcs NCCLLibrary.path_to_dict_mapping[so_file] = _funcs
self._funcs = NCCLLibrary.path_to_dict_mapping[so_file] self._funcs = NCCLLibrary.path_to_dict_mapping[so_file]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment