Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a1873db2
Unverified
Commit
a1873db2
authored
Jul 29, 2025
by
Doug Smith
Committed by
GitHub
Jul 29, 2025
Browse files
docker: docker-aware precompiled wheel support (#21127)
Signed-off-by:
dougbtv
<
dosmith@redhat.com
>
parent
a33ea28b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
68 additions
and
27 deletions
+68
-27
docker/Dockerfile
docker/Dockerfile
+16
-10
setup.py
setup.py
+43
-15
vllm/envs.py
vllm/envs.py
+9
-2
No files found.
docker/Dockerfile
View file @
a1873db2
...
@@ -209,16 +209,7 @@ ARG SCCACHE_REGION_NAME=us-west-2
...
@@ -209,16 +209,7 @@ ARG SCCACHE_REGION_NAME=us-west-2
ARG
SCCACHE_S3_NO_CREDENTIALS=0
ARG
SCCACHE_S3_NO_CREDENTIALS=0
# Flag to control whether to use pre-built vLLM wheels
# Flag to control whether to use pre-built vLLM wheels
ARG
VLLM_USE_PRECOMPILED
ARG
VLLM_USE_PRECOMPILED=""
# TODO: in setup.py VLLM_USE_PRECOMPILED is sensitive to truthiness, it will take =0 as "true", this should be fixed
ENV
VLLM_USE_PRECOMPILED=""
RUN if
[
"
${
VLLM_USE_PRECOMPILED
}
"
=
"1"
]
;
then
\
export
VLLM_USE_PRECOMPILED
=
1
&&
\
echo
"Using precompiled wheels"
;
\
else
\
unset
VLLM_USE_PRECOMPILED
&&
\
echo
"Leaving VLLM_USE_PRECOMPILED unset to build wheels from source"
;
\
fi
# if USE_SCCACHE is set, use sccache to speed up compilation
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN
--mount
=
type
=
cache,target
=
/root/.cache/uv
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/uv
\
...
@@ -235,6 +226,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
...
@@ -235,6 +226,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
&&
export
SCCACHE_S3_NO_CREDENTIALS
=
${
SCCACHE_S3_NO_CREDENTIALS
}
\
&&
export
SCCACHE_S3_NO_CREDENTIALS
=
${
SCCACHE_S3_NO_CREDENTIALS
}
\
&&
export
SCCACHE_IDLE_TIMEOUT
=
0
\
&&
export
SCCACHE_IDLE_TIMEOUT
=
0
\
&&
export
CMAKE_BUILD_TYPE
=
Release
\
&&
export
CMAKE_BUILD_TYPE
=
Release
\
&&
export
VLLM_USE_PRECOMPILED
=
"
${
VLLM_USE_PRECOMPILED
}
"
\
&&
export
VLLM_DOCKER_BUILD_CONTEXT
=
1
\
&&
sccache
--show-stats
\
&&
sccache
--show-stats
\
&&
python3 setup.py bdist_wheel
--dist-dir
=
dist
--py-limited-api
=
cp38
\
&&
python3 setup.py bdist_wheel
--dist-dir
=
dist
--py-limited-api
=
cp38
\
&&
sccache
--show-stats
;
\
&&
sccache
--show-stats
;
\
...
@@ -248,9 +241,22 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
...
@@ -248,9 +241,22 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
# Clean any existing CMake artifacts
# Clean any existing CMake artifacts
rm -rf .deps && \
rm -rf .deps && \
mkdir -p .deps && \
mkdir -p .deps && \
export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
fi
fi
# When using precompiled wheels, keep only the newest manylinux1 wheel and delete others
RUN if
[
"
$VLLM_USE_PRECOMPILED
"
=
"1"
]
;
then
\
echo
"Cleaning up extra wheels in dist/..."
&&
\
# Identify the most recent manylinux1_x86_64 wheel
KEEP_WHEEL=$(ls -t dist/*manylinux1_x86_64.whl 2>/dev/null | head -n1) && \
if [ -n "$KEEP_WHEEL" ]; then \
echo "Keeping wheel: $KEEP_WHEEL"; \
find dist/ -type f -name "*.whl" ! -path "${KEEP_WHEEL}" -delete; \
fi; \
fi
# Check the size of the wheel if RUN_WHEEL_CHECK is true
# Check the size of the wheel if RUN_WHEEL_CHECK is true
COPY
.buildkite/check-wheel-size.py check-wheel-size.py
COPY
.buildkite/check-wheel-size.py check-wheel-size.py
# sync the default value with .buildkite/check-wheel-size.py
# sync the default value with .buildkite/check-wheel-size.py
...
...
setup.py
View file @
a1873db2
...
@@ -7,6 +7,7 @@ import json
...
@@ -7,6 +7,7 @@ import json
import
logging
import
logging
import
os
import
os
import
re
import
re
import
shutil
import
subprocess
import
subprocess
import
sys
import
sys
from
pathlib
import
Path
from
pathlib
import
Path
...
@@ -297,6 +298,10 @@ class repackage_wheel(build_ext):
...
@@ -297,6 +298,10 @@ class repackage_wheel(build_ext):
]).
decode
(
"utf-8"
)
]).
decode
(
"utf-8"
)
upstream_main_commit
=
json
.
loads
(
resp_json
)[
"sha"
]
upstream_main_commit
=
json
.
loads
(
resp_json
)[
"sha"
]
# In Docker build context, .git may be immutable or missing.
if
envs
.
VLLM_DOCKER_BUILD_CONTEXT
:
return
upstream_main_commit
# Check if the upstream_main_commit exists in the local repo
# Check if the upstream_main_commit exists in the local repo
try
:
try
:
subprocess
.
check_output
(
subprocess
.
check_output
(
...
@@ -357,19 +362,48 @@ class repackage_wheel(build_ext):
...
@@ -357,19 +362,48 @@ class repackage_wheel(build_ext):
# create a temporary directory to store the wheel
# create a temporary directory to store the wheel
temp_dir
=
tempfile
.
mkdtemp
(
prefix
=
"vllm-wheels"
)
temp_dir
=
tempfile
.
mkdtemp
(
prefix
=
"vllm-wheels"
)
wheel_path
=
os
.
path
.
join
(
temp_dir
,
wheel_filename
)
wheel_path
=
os
.
path
.
join
(
temp_dir
,
wheel_filename
)
print
(
f
"Downloading wheel from
{
wheel_location
}
to
{
wheel_path
}
"
)
print
(
f
"Downloading wheel from
{
wheel_location
}
to
{
wheel_path
}
"
)
from
urllib.request
import
urlretrieve
from
urllib.request
import
urlretrieve
try
:
try
:
urlretrieve
(
wheel_location
,
filename
=
wheel_path
)
urlretrieve
(
wheel_location
,
filename
=
wheel_path
)
except
Exception
as
e
:
except
Exception
as
e
:
from
setuptools.errors
import
SetupError
from
setuptools.errors
import
SetupError
raise
SetupError
(
raise
SetupError
(
f
"Failed to get vLLM wheel from
{
wheel_location
}
"
)
from
e
f
"Failed to get vLLM wheel from
{
wheel_location
}
"
)
from
e
# During a docker build: determine correct filename, copy wheel.
if
envs
.
VLLM_DOCKER_BUILD_CONTEXT
:
dist_dir
=
"/workspace/dist"
os
.
makedirs
(
dist_dir
,
exist_ok
=
True
)
# Determine correct wheel filename from METADATA
with
zipfile
.
ZipFile
(
wheel_path
,
"r"
)
as
z
:
metadata_file
=
next
(
(
n
for
n
in
z
.
namelist
()
if
n
.
endswith
(
".dist-info/METADATA"
)),
None
,
)
if
not
metadata_file
:
raise
RuntimeError
(
"Could not find METADATA in precompiled wheel."
)
metadata
=
z
.
read
(
metadata_file
).
decode
()
version_line
=
next
((
line
for
line
in
metadata
.
splitlines
()
if
line
.
startswith
(
"Version: "
)),
None
)
if
not
version_line
:
raise
RuntimeError
(
"Could not determine version from METADATA."
)
version
=
version_line
.
split
(
": "
)[
1
].
strip
()
# Build correct filename using internal version
arch_tag
=
"cp38-abi3-manylinux1_x86_64"
corrected_wheel_name
=
f
"vllm-
{
version
}
-
{
arch_tag
}
.whl"
final_wheel_path
=
os
.
path
.
join
(
dist_dir
,
corrected_wheel_name
)
print
(
f
"Docker build context detected, copying precompiled wheel "
f
"(
{
version
}
) to
{
final_wheel_path
}
"
)
shutil
.
copy2
(
wheel_path
,
final_wheel_path
)
return
# Unzip the wheel when not in Docker context
with
zipfile
.
ZipFile
(
wheel_path
)
as
wheel
:
with
zipfile
.
ZipFile
(
wheel_path
)
as
wheel
:
files_to_copy
=
[
files_to_copy
=
[
"vllm/_C.abi3.so"
,
"vllm/_C.abi3.so"
,
...
@@ -378,15 +412,9 @@ class repackage_wheel(build_ext):
...
@@ -378,15 +412,9 @@ class repackage_wheel(build_ext):
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so"
,
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so"
,
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so"
,
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so"
,
"vllm/cumem_allocator.abi3.so"
,
"vllm/cumem_allocator.abi3.so"
,
# "vllm/_version.py", # not available in nightly wheels yet
]
]
file_members
=
list
(
file_members
=
list
(
filter
(
lambda
x
:
x
.
filename
in
files_to_copy
,
wheel
.
filelist
))
filter
(
lambda
x
:
x
.
filename
in
files_to_copy
,
wheel
.
filelist
))
# vllm_flash_attn python code:
# Regex from
# `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
compiled_regex
=
re
.
compile
(
compiled_regex
=
re
.
compile
(
r
"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
)
r
"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
)
file_members
+=
list
(
file_members
+=
list
(
...
@@ -403,11 +431,8 @@ class repackage_wheel(build_ext):
...
@@ -403,11 +431,8 @@ class repackage_wheel(build_ext):
package_data
[
package_name
]
=
[]
package_data
[
package_name
]
=
[]
wheel
.
extract
(
file
)
wheel
.
extract
(
file
)
if
file_name
.
endswith
(
".py"
):
if
not
file_name
.
endswith
(
".py"
):
# python files shouldn't be added to package_data
package_data
[
package_name
].
append
(
file_name
)
continue
package_data
[
package_name
].
append
(
file_name
)
def
_no_device
()
->
bool
:
def
_no_device
()
->
bool
:
...
@@ -415,6 +440,9 @@ def _no_device() -> bool:
...
@@ -415,6 +440,9 @@ def _no_device() -> bool:
def
_is_cuda
()
->
bool
:
def
_is_cuda
()
->
bool
:
# Allow forced CUDA in Docker/precompiled builds, even without torch.cuda
if
envs
.
VLLM_USE_PRECOMPILED
and
envs
.
VLLM_DOCKER_BUILD_CONTEXT
:
return
True
has_cuda
=
torch
.
version
.
cuda
is
not
None
has_cuda
=
torch
.
version
.
cuda
is
not
None
return
(
VLLM_TARGET_DEVICE
==
"cuda"
and
has_cuda
return
(
VLLM_TARGET_DEVICE
==
"cuda"
and
has_cuda
and
not
(
_is_neuron
()
or
_is_tpu
()))
and
not
(
_is_neuron
()
or
_is_tpu
()))
...
...
vllm/envs.py
View file @
a1873db2
...
@@ -68,6 +68,7 @@ if TYPE_CHECKING:
...
@@ -68,6 +68,7 @@ if TYPE_CHECKING:
MAX_JOBS
:
Optional
[
str
]
=
None
MAX_JOBS
:
Optional
[
str
]
=
None
NVCC_THREADS
:
Optional
[
str
]
=
None
NVCC_THREADS
:
Optional
[
str
]
=
None
VLLM_USE_PRECOMPILED
:
bool
=
False
VLLM_USE_PRECOMPILED
:
bool
=
False
VLLM_DOCKER_BUILD_CONTEXT
:
bool
=
False
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL
:
bool
=
False
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL
:
bool
=
False
VLLM_NO_DEPRECATION_WARNING
:
bool
=
False
VLLM_NO_DEPRECATION_WARNING
:
bool
=
False
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH
:
bool
=
False
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH
:
bool
=
False
...
@@ -222,8 +223,14 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -222,8 +223,14 @@ environment_variables: dict[str, Callable[[], Any]] = {
# If set, vllm will use precompiled binaries (*.so)
# If set, vllm will use precompiled binaries (*.so)
"VLLM_USE_PRECOMPILED"
:
"VLLM_USE_PRECOMPILED"
:
lambda
:
bool
(
os
.
environ
.
get
(
"VLLM_USE_PRECOMPILED"
))
or
bool
(
lambda
:
os
.
environ
.
get
(
"VLLM_USE_PRECOMPILED"
,
""
).
strip
().
lower
()
in
os
.
environ
.
get
(
"VLLM_PRECOMPILED_WHEEL_LOCATION"
)),
(
"1"
,
"true"
)
or
bool
(
os
.
environ
.
get
(
"VLLM_PRECOMPILED_WHEEL_LOCATION"
)),
# Used to mark that setup.py is running in a Docker build context,
# in order to force the use of precompiled binaries.
"VLLM_DOCKER_BUILD_CONTEXT"
:
lambda
:
os
.
environ
.
get
(
"VLLM_DOCKER_BUILD_CONTEXT"
,
""
).
strip
().
lower
()
in
(
"1"
,
"true"
),
# Whether to force using nightly wheel in python build.
# Whether to force using nightly wheel in python build.
# This is used for testing the nightly wheel in python build.
# This is used for testing the nightly wheel in python build.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment