Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
390b495f
Unverified
Commit
390b495f
authored
Jan 26, 2024
by
Philipp Moritz
Committed by
GitHub
Jan 26, 2024
Browse files
Don't build punica kernels by default (#2605)
parent
3a0e1fc0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
4 deletions
+11
-4
.github/workflows/scripts/build.sh
.github/workflows/scripts/build.sh
+2
-0
Dockerfile
Dockerfile
+2
-0
setup.py
setup.py
+1
-1
vllm/lora/punica.py
vllm/lora/punica.py
+6
-3
No files found.
.github/workflows/scripts/build.sh
View file @
390b495f
...
@@ -13,6 +13,8 @@ $python_executable -m pip install -r requirements.txt
...
@@ -13,6 +13,8 @@ $python_executable -m pip install -r requirements.txt
# Limit the number of parallel jobs to avoid OOM
# Limit the number of parallel jobs to avoid OOM
export
MAX_JOBS
=
1
export
MAX_JOBS
=
1
# Make sure punica is built for the release (for LoRA)
export
VLLM_INSTALL_PUNICA_KERNELS
=
1
# Build
# Build
$python_executable
setup.py bdist_wheel
--dist-dir
=
dist
$python_executable
setup.py bdist_wheel
--dist-dir
=
dist
Dockerfile
View file @
390b495f
...
@@ -45,6 +45,8 @@ ENV MAX_JOBS=${max_jobs}
...
@@ -45,6 +45,8 @@ ENV MAX_JOBS=${max_jobs}
# number of threads used by nvcc
# number of threads used by nvcc
ARG
nvcc_threads=8
ARG
nvcc_threads=8
ENV
NVCC_THREADS=$nvcc_threads
ENV
NVCC_THREADS=$nvcc_threads
# make sure punica kernels are built (for LoRA)
ENV
VLLM_INSTALL_PUNICA_KERNELS=1
RUN
python3 setup.py build_ext
--inplace
RUN
python3 setup.py build_ext
--inplace
#################### EXTENSION Build IMAGE ####################
#################### EXTENSION Build IMAGE ####################
...
...
setup.py
View file @
390b495f
...
@@ -265,7 +265,7 @@ if _is_cuda():
...
@@ -265,7 +265,7 @@ if _is_cuda():
with
contextlib
.
suppress
(
ValueError
):
with
contextlib
.
suppress
(
ValueError
):
torch_cpp_ext
.
COMMON_NVCC_FLAGS
.
remove
(
flag
)
torch_cpp_ext
.
COMMON_NVCC_FLAGS
.
remove
(
flag
)
install_punica
=
bool
(
int
(
os
.
getenv
(
"VLLM_INSTALL_PUNICA_KERNELS"
,
"
1
"
)))
install_punica
=
bool
(
int
(
os
.
getenv
(
"VLLM_INSTALL_PUNICA_KERNELS"
,
"
0
"
)))
device_count
=
torch
.
cuda
.
device_count
()
device_count
=
torch
.
cuda
.
device_count
()
for
i
in
range
(
device_count
):
for
i
in
range
(
device_count
):
major
,
minor
=
torch
.
cuda
.
get_device_capability
(
i
)
major
,
minor
=
torch
.
cuda
.
get_device_capability
(
i
)
...
...
vllm/lora/punica.py
View file @
390b495f
...
@@ -157,10 +157,13 @@ else:
...
@@ -157,10 +157,13 @@ else:
**
kwargs
# pylint: disable=unused-argument
**
kwargs
# pylint: disable=unused-argument
):
):
if
torch
.
cuda
.
get_device_capability
()
<
(
8
,
0
):
if
torch
.
cuda
.
get_device_capability
()
<
(
8
,
0
):
raise
ImportError
(
raise
ImportError
(
"punica LoRA kernels require compute "
"LoRA kernels require compute
capability>=8.0"
)
from
import_exc
"
capability>=8.0"
)
from
import_exc
else
:
else
:
raise
import_exc
raise
ImportError
(
"punica LoRA kernels could not be imported. If you built vLLM "
"from source, make sure VLLM_INSTALL_PUNICA_KERNELS=1 env var "
"was set."
)
from
import_exc
bgmv
=
_raise_exc
bgmv
=
_raise_exc
add_lora
=
_raise_exc
add_lora
=
_raise_exc
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment