Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
042c3419
Unverified
Commit
042c3419
authored
Feb 12, 2025
by
Lu Fang
Committed by
GitHub
Feb 12, 2025
Browse files
Introduce VLLM_CUDART_SO_PATH to allow users specify the .so path (#12998)
Signed-off-by:
Lu Fang
<
lufang@fb.com
>
parent
82cabf53
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
37 additions
and
1 deletion
+37
-1
vllm/distributed/device_communicators/cuda_wrapper.py
vllm/distributed/device_communicators/cuda_wrapper.py
+31
-1
vllm/envs.py
vllm/envs.py
+6
-0
No files found.
vllm/distributed/device_communicators/cuda_wrapper.py
View file @
042c3419
...
@@ -5,12 +5,14 @@ convenient for use when we just need to call a few functions.
...
@@ -5,12 +5,14 @@ convenient for use when we just need to call a few functions.
"""
"""
import
ctypes
import
ctypes
import
glob
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Any
,
Dict
,
List
,
Optional
from
typing
import
Any
,
Dict
,
List
,
Optional
# this line makes it possible to directly load `libcudart.so` using `ctypes`
# this line makes it possible to directly load `libcudart.so` using `ctypes`
import
torch
# noqa
import
torch
# noqa
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -60,6 +62,29 @@ def find_loaded_library(lib_name) -> Optional[str]:
...
@@ -60,6 +62,29 @@ def find_loaded_library(lib_name) -> Optional[str]:
return
path
return
path
def
get_cudart_lib_path_from_env
()
->
Optional
[
str
]:
"""
In some system, find_loaded_library() may not work. So we allow users to
specify the path through environment variable VLLM_CUDART_SO_PATH.
"""
cudart_so_env
=
envs
.
VLLM_CUDART_SO_PATH
if
cudart_so_env
is
not
None
:
cudart_paths
=
[
cudart_so_env
,
]
for
path
in
cudart_paths
:
file_paths
=
glob
.
glob
(
path
)
if
len
(
file_paths
)
>
0
:
logger
.
info
(
"Found cudart library at %s through env var"
"VLLM_CUDART_SO_PATH=%s"
,
file_paths
[
0
],
cudart_so_env
,
)
return
file_paths
[
0
]
return
None
class
CudaRTLibrary
:
class
CudaRTLibrary
:
exported_functions
=
[
exported_functions
=
[
# cudaError_t cudaSetDevice ( int device )
# cudaError_t cudaSetDevice ( int device )
...
@@ -105,8 +130,13 @@ class CudaRTLibrary:
...
@@ -105,8 +130,13 @@ class CudaRTLibrary:
def
__init__
(
self
,
so_file
:
Optional
[
str
]
=
None
):
def
__init__
(
self
,
so_file
:
Optional
[
str
]
=
None
):
if
so_file
is
None
:
if
so_file
is
None
:
so_file
=
find_loaded_library
(
"libcudart"
)
so_file
=
find_loaded_library
(
"libcudart"
)
if
so_file
is
None
:
so_file
=
get_cudart_lib_path_from_env
()
assert
so_file
is
not
None
,
\
assert
so_file
is
not
None
,
\
"libcudart is not loaded in the current process"
(
"libcudart is not loaded in the current process, "
"try setting VLLM_CUDART_SO_PATH"
)
if
so_file
not
in
CudaRTLibrary
.
path_to_library_cache
:
if
so_file
not
in
CudaRTLibrary
.
path_to_library_cache
:
lib
=
ctypes
.
CDLL
(
so_file
)
lib
=
ctypes
.
CDLL
(
so_file
)
CudaRTLibrary
.
path_to_library_cache
[
so_file
]
=
lib
CudaRTLibrary
.
path_to_library_cache
[
so_file
]
=
lib
...
...
vllm/envs.py
View file @
042c3419
...
@@ -87,6 +87,7 @@ if TYPE_CHECKING:
...
@@ -87,6 +87,7 @@ if TYPE_CHECKING:
VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON
:
bool
=
False
VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON
:
bool
=
False
VLLM_RAY_PER_WORKER_GPUS
:
float
=
1.0
VLLM_RAY_PER_WORKER_GPUS
:
float
=
1.0
VLLM_RAY_BUNDLE_INDICES
:
str
=
""
VLLM_RAY_BUNDLE_INDICES
:
str
=
""
VLLM_CUDART_SO_PATH
:
Optional
[
str
]
=
None
def
get_default_cache_root
():
def
get_default_cache_root
():
...
@@ -572,6 +573,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
...
@@ -572,6 +573,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
# models the alignment is already naturally aligned to 256 bytes.
# models the alignment is already naturally aligned to 256 bytes.
"VLLM_CUDA_MEM_ALIGN_KV_CACHE"
:
"VLLM_CUDA_MEM_ALIGN_KV_CACHE"
:
lambda
:
bool
(
int
(
os
.
getenv
(
"VLLM_CUDA_MEM_ALIGN_KV_CACHE"
,
"1"
))),
lambda
:
bool
(
int
(
os
.
getenv
(
"VLLM_CUDA_MEM_ALIGN_KV_CACHE"
,
"1"
))),
# In some system, find_loaded_library() may not work. So we allow users to
# specify the path through environment variable VLLM_CUDART_SO_PATH.
"VLLM_CUDART_SO_PATH"
:
lambda
:
os
.
getenv
(
"VLLM_CUDART_SO_PATH"
,
None
),
}
}
# end-env-vars-definition
# end-env-vars-definition
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment