Unverified Commit 042c3419 authored by Lu Fang's avatar Lu Fang Committed by GitHub
Browse files

Introduce VLLM_CUDART_SO_PATH to allow users specify the .so path (#12998)


Signed-off-by: default avatarLu Fang <lufang@fb.com>
parent 82cabf53
...@@ -5,12 +5,14 @@ convenient for use when we just need to call a few functions. ...@@ -5,12 +5,14 @@ convenient for use when we just need to call a few functions.
""" """
import ctypes import ctypes
import glob
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
# this line makes it possible to directly load `libcudart.so` using `ctypes` # this line makes it possible to directly load `libcudart.so` using `ctypes`
import torch # noqa import torch # noqa
import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -60,6 +62,29 @@ def find_loaded_library(lib_name) -> Optional[str]: ...@@ -60,6 +62,29 @@ def find_loaded_library(lib_name) -> Optional[str]:
return path return path
def get_cudart_lib_path_from_env() -> Optional[str]:
"""
In some system, find_loaded_library() may not work. So we allow users to
specify the path through environment variable VLLM_CUDART_SO_PATH.
"""
cudart_so_env = envs.VLLM_CUDART_SO_PATH
if cudart_so_env is not None:
cudart_paths = [
cudart_so_env,
]
for path in cudart_paths:
file_paths = glob.glob(path)
if len(file_paths) > 0:
logger.info(
"Found cudart library at %s through env var"
"VLLM_CUDART_SO_PATH=%s",
file_paths[0],
cudart_so_env,
)
return file_paths[0]
return None
class CudaRTLibrary: class CudaRTLibrary:
exported_functions = [ exported_functions = [
# ​cudaError_t cudaSetDevice ( int device ) # ​cudaError_t cudaSetDevice ( int device )
...@@ -105,8 +130,13 @@ class CudaRTLibrary: ...@@ -105,8 +130,13 @@ class CudaRTLibrary:
def __init__(self, so_file: Optional[str] = None): def __init__(self, so_file: Optional[str] = None):
if so_file is None: if so_file is None:
so_file = find_loaded_library("libcudart") so_file = find_loaded_library("libcudart")
if so_file is None:
so_file = get_cudart_lib_path_from_env()
assert so_file is not None, \ assert so_file is not None, \
"libcudart is not loaded in the current process" (
"libcudart is not loaded in the current process, "
"try setting VLLM_CUDART_SO_PATH"
)
if so_file not in CudaRTLibrary.path_to_library_cache: if so_file not in CudaRTLibrary.path_to_library_cache:
lib = ctypes.CDLL(so_file) lib = ctypes.CDLL(so_file)
CudaRTLibrary.path_to_library_cache[so_file] = lib CudaRTLibrary.path_to_library_cache[so_file] = lib
......
...@@ -87,6 +87,7 @@ if TYPE_CHECKING: ...@@ -87,6 +87,7 @@ if TYPE_CHECKING:
VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON: bool = False VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON: bool = False
VLLM_RAY_PER_WORKER_GPUS: float = 1.0 VLLM_RAY_PER_WORKER_GPUS: float = 1.0
VLLM_RAY_BUNDLE_INDICES: str = "" VLLM_RAY_BUNDLE_INDICES: str = ""
VLLM_CUDART_SO_PATH: Optional[str] = None
def get_default_cache_root(): def get_default_cache_root():
...@@ -572,6 +573,11 @@ environment_variables: Dict[str, Callable[[], Any]] = { ...@@ -572,6 +573,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
# models the alignment is already naturally aligned to 256 bytes. # models the alignment is already naturally aligned to 256 bytes.
"VLLM_CUDA_MEM_ALIGN_KV_CACHE": "VLLM_CUDA_MEM_ALIGN_KV_CACHE":
lambda: bool(int(os.getenv("VLLM_CUDA_MEM_ALIGN_KV_CACHE", "1"))), lambda: bool(int(os.getenv("VLLM_CUDA_MEM_ALIGN_KV_CACHE", "1"))),
# In some system, find_loaded_library() may not work. So we allow users to
# specify the path through environment variable VLLM_CUDART_SO_PATH.
"VLLM_CUDART_SO_PATH":
lambda: os.getenv("VLLM_CUDART_SO_PATH", None),
} }
# end-env-vars-definition # end-env-vars-definition
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment