Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
344bf7cd
Unverified
Commit
344bf7cd
authored
May 03, 2024
by
youkaichao
Committed by
GitHub
May 03, 2024
Browse files
[Misc] add installation time env vars (#4574)
parent
ab502751
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
81 additions
and
18 deletions
+81
-18
setup.py
setup.py
+23
-10
vllm/envs.py
vllm/envs.py
+58
-8
No files found.
setup.py
View file @
344bf7cd
import
importlib.util
import
io
import
logging
import
os
...
...
@@ -13,10 +14,23 @@ from setuptools import Extension, find_packages, setup
from
setuptools.command.build_ext
import
build_ext
from
torch.utils.cpp_extension
import
CUDA_HOME
def
load_module_from_path
(
module_name
,
path
):
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
path
)
module
=
importlib
.
util
.
module_from_spec
(
spec
)
sys
.
modules
[
module_name
]
=
module
spec
.
loader
.
exec_module
(
module
)
return
module
ROOT_DIR
=
os
.
path
.
dirname
(
__file__
)
logger
=
logging
.
getLogger
(
__name__
)
# Target device of vLLM, supporting [cuda (by default), rocm, neuron, cpu]
VLLM_TARGET_DEVICE
=
os
.
getenv
(
"VLLM_TARGET_DEVICE"
,
"cuda"
)
# cannot import envs directly because it depends on vllm,
# which is not installed yet
envs
=
load_module_from_path
(
'envs'
,
os
.
path
.
join
(
ROOT_DIR
,
'vllm'
,
'envs.py'
))
VLLM_TARGET_DEVICE
=
envs
.
VLLM_TARGET_DEVICE
# vLLM only supports Linux platform
assert
sys
.
platform
.
startswith
(
...
...
@@ -60,7 +74,7 @@ class cmake_build_ext(build_ext):
def
compute_num_jobs
(
self
):
# `num_jobs` is either the value of the MAX_JOBS environment variable
# (if defined) or the number of CPUs available.
num_jobs
=
os
.
environ
.
get
(
"MAX_JOBS"
,
None
)
num_jobs
=
envs
.
MAX_JOBS
if
num_jobs
is
not
None
:
num_jobs
=
int
(
num_jobs
)
logger
.
info
(
"Using MAX_JOBS=%d as the number of jobs."
,
num_jobs
)
...
...
@@ -78,7 +92,7 @@ class cmake_build_ext(build_ext):
# environment variable (if defined) or 1.
# when it is set, we reduce `num_jobs` to avoid
# overloading the system.
nvcc_threads
=
os
.
get
env
(
"
NVCC_THREADS
"
,
None
)
nvcc_threads
=
env
s
.
NVCC_THREADS
if
nvcc_threads
is
not
None
:
nvcc_threads
=
int
(
nvcc_threads
)
logger
.
info
(
...
...
@@ -104,7 +118,7 @@ class cmake_build_ext(build_ext):
# Select the build type.
# Note: optimization level + debug info are set by the build type
default_cfg
=
"Debug"
if
self
.
debug
else
"RelWithDebInfo"
cfg
=
os
.
get
env
(
"
CMAKE_BUILD_TYPE
"
,
default_cfg
)
cfg
=
env
s
.
CMAKE_BUILD_TYPE
or
default_cfg
# where .so files will be written, should be the same for all extensions
# that use the same CMakeLists.txt.
...
...
@@ -118,7 +132,7 @@ class cmake_build_ext(build_ext):
'-DVLLM_TARGET_DEVICE={}'
.
format
(
VLLM_TARGET_DEVICE
),
]
verbose
=
bool
(
int
(
os
.
get
env
(
'
VERBOSE
'
,
'0'
)))
verbose
=
env
s
.
VERBOSE
if
verbose
:
cmake_args
+=
[
'-DCMAKE_VERBOSE_MAKEFILE=ON'
]
...
...
@@ -205,8 +219,7 @@ def _is_neuron() -> bool:
subprocess
.
run
([
"neuron-ls"
],
capture_output
=
True
,
check
=
True
)
except
(
FileNotFoundError
,
PermissionError
,
subprocess
.
CalledProcessError
):
torch_neuronx_installed
=
False
return
torch_neuronx_installed
or
os
.
environ
.
get
(
"VLLM_BUILD_WITH_NEURON"
,
False
)
return
torch_neuronx_installed
or
envs
.
VLLM_BUILD_WITH_NEURON
def
_is_cpu
()
->
bool
:
...
...
@@ -214,7 +227,7 @@ def _is_cpu() -> bool:
def
_install_punica
()
->
bool
:
return
bool
(
int
(
os
.
get
env
(
"
VLLM_INSTALL_PUNICA_KERNELS
"
,
"0"
)))
return
env
s
.
VLLM_INSTALL_PUNICA_KERNELS
def
get_hipcc_rocm_version
():
...
...
@@ -377,7 +390,7 @@ if not _is_neuron():
package_data
=
{
"vllm"
:
[
"py.typed"
,
"model_executor/layers/fused_moe/configs/*.json"
]
}
if
os
.
environ
.
get
(
"
VLLM_USE_PRECOMPILED
"
)
:
if
envs
.
VLLM_USE_PRECOMPILED
:
ext_modules
=
[]
package_data
[
"vllm"
].
append
(
"*.so"
)
...
...
vllm/envs.py
View file @
344bf7cd
...
...
@@ -27,6 +27,14 @@ if TYPE_CHECKING:
VLLM_CPU_KVCACHE_SPACE
:
int
=
0
VLLM_USE_RAY_COMPILED_DAG
:
bool
=
False
VLLM_WORKER_MULTIPROC_METHOD
:
str
=
"spawn"
VLLM_TARGET_DEVICE
:
str
=
"cuda"
MAX_JOBS
:
Optional
[
str
]
=
None
NVCC_THREADS
:
Optional
[
str
]
=
None
VLLM_BUILD_WITH_NEURON
:
bool
=
False
VLLM_USE_PRECOMPILED
:
bool
=
False
VLLM_INSTALL_PUNICA_KERNELS
:
bool
=
False
CMAKE_BUILD_TYPE
:
Optional
[
str
]
=
None
VERBOSE
:
bool
=
False
# The begin-* and end* here are used by the documentation generator
# to extract the used env vars.
...
...
@@ -34,6 +42,56 @@ if TYPE_CHECKING:
# begin-env-vars-definition
environment_variables
:
Dict
[
str
,
Callable
[[],
Any
]]
=
{
# ================== Installation Time Env Vars ==================
# Target device of vLLM, supporting [cuda (by default), rocm, neuron, cpu]
"VLLM_TARGET_DEVICE"
:
lambda
:
os
.
getenv
(
"VLLM_TARGET_DEVICE"
,
"cuda"
),
# Maximum number of compilation jobs to run in parallel.
# By default this is the number of CPUs
"MAX_JOBS"
:
lambda
:
os
.
getenv
(
"MAX_JOBS"
,
None
),
# Number of threads to use for nvcc
# By default this is 1.
# If set, `MAX_JOBS` will be reduced to avoid oversubscribing the CPU.
"NVCC_THREADS"
:
lambda
:
os
.
getenv
(
"NVCC_THREADS"
,
None
),
# If set, vllm will build with Neuron support
"VLLM_BUILD_WITH_NEURON"
:
lambda
:
bool
(
os
.
environ
.
get
(
"VLLM_BUILD_WITH_NEURON"
,
False
)),
# If set, vllm will use precompiled binaries (*.so)
"VLLM_USE_PRECOMPILED"
:
lambda
:
bool
(
os
.
environ
.
get
(
"VLLM_USE_PRECOMPILED"
)),
# If set, vllm will install Punica kernels
"VLLM_INSTALL_PUNICA_KERNELS"
:
lambda
:
bool
(
int
(
os
.
getenv
(
"VLLM_INSTALL_PUNICA_KERNELS"
,
"0"
))),
# CMake build type
# If not set, defaults to "Debug" or "RelWithDebInfo"
# Available options: "Debug", "Release", "RelWithDebInfo"
"CMAKE_BUILD_TYPE"
:
lambda
:
os
.
getenv
(
"CMAKE_BUILD_TYPE"
),
# If set, vllm will print verbose logs during installation
"VERBOSE"
:
lambda
:
bool
(
int
(
os
.
getenv
(
'VERBOSE'
,
'0'
))),
# Root directory for VLLM configuration files
# Note that this not only affects how vllm finds its configuration files
# during runtime, but also affects how vllm installs its configuration
# files during **installation**.
"VLLM_CONFIG_ROOT"
:
lambda
:
os
.
environ
.
get
(
"VLLM_CONFIG_ROOT"
,
None
)
or
os
.
getenv
(
"XDG_CONFIG_HOME"
,
None
)
or
os
.
path
.
expanduser
(
"~/.config"
),
# ================== Runtime Env Vars ==================
# used in distributed environment to determine the master address
'VLLM_HOST_IP'
:
lambda
:
os
.
getenv
(
'VLLM_HOST_IP'
,
""
)
or
os
.
getenv
(
"HOST_IP"
,
""
),
...
...
@@ -93,14 +151,6 @@ environment_variables: Dict[str, Callable[[], Any]] = {
"S3_ENDPOINT_URL"
:
lambda
:
os
.
environ
.
get
(
"S3_ENDPOINT_URL"
,
None
),
# Root directory for VLLM configuration files
# Note that this not only affects how vllm finds its configuration files
# during runtime, but also affects how vllm installs its configuration
# files during **installation**.
"VLLM_CONFIG_ROOT"
:
lambda
:
os
.
environ
.
get
(
"VLLM_CONFIG_ROOT"
,
None
)
or
os
.
getenv
(
"XDG_CONFIG_HOME"
,
None
)
or
os
.
path
.
expanduser
(
"~/.config"
),
# Usage stats collection
"VLLM_USAGE_STATS_SERVER"
:
lambda
:
os
.
environ
.
get
(
"VLLM_USAGE_STATS_SERVER"
,
"https://stats.vllm.ai"
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment