Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a1873db2
Unverified
Commit
a1873db2
authored
Jul 29, 2025
by
Doug Smith
Committed by
GitHub
Jul 29, 2025
Browse files
docker: docker-aware precompiled wheel support (#21127)
Signed-off-by:
dougbtv
<
dosmith@redhat.com
>
parent
a33ea28b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
68 additions
and
27 deletions
+68
-27
docker/Dockerfile
docker/Dockerfile
+16
-10
setup.py
setup.py
+43
-15
vllm/envs.py
vllm/envs.py
+9
-2
No files found.
docker/Dockerfile
View file @
a1873db2
...
...
@@ -209,16 +209,7 @@ ARG SCCACHE_REGION_NAME=us-west-2
ARG
SCCACHE_S3_NO_CREDENTIALS=0
# Flag to control whether to use pre-built vLLM wheels
ARG
VLLM_USE_PRECOMPILED
# TODO: in setup.py VLLM_USE_PRECOMPILED is sensitive to truthiness, it will take =0 as "true", this should be fixed
ENV
VLLM_USE_PRECOMPILED=""
RUN if
[
"
${
VLLM_USE_PRECOMPILED
}
"
=
"1"
]
;
then
\
export
VLLM_USE_PRECOMPILED
=
1
&&
\
echo
"Using precompiled wheels"
;
\
else
\
unset
VLLM_USE_PRECOMPILED
&&
\
echo
"Leaving VLLM_USE_PRECOMPILED unset to build wheels from source"
;
\
fi
ARG
VLLM_USE_PRECOMPILED=""
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN
--mount
=
type
=
cache,target
=
/root/.cache/uv
\
...
...
@@ -235,6 +226,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
&&
export
SCCACHE_S3_NO_CREDENTIALS
=
${
SCCACHE_S3_NO_CREDENTIALS
}
\
&&
export
SCCACHE_IDLE_TIMEOUT
=
0
\
&&
export
CMAKE_BUILD_TYPE
=
Release
\
&&
export
VLLM_USE_PRECOMPILED
=
"
${
VLLM_USE_PRECOMPILED
}
"
\
&&
export
VLLM_DOCKER_BUILD_CONTEXT
=
1
\
&&
sccache
--show-stats
\
&&
python3 setup.py bdist_wheel
--dist-dir
=
dist
--py-limited-api
=
cp38
\
&&
sccache
--show-stats
;
\
...
...
@@ -248,9 +241,22 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
# Clean any existing CMake artifacts
rm -rf .deps && \
mkdir -p .deps && \
export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
fi
# When using precompiled wheels, keep only the newest manylinux1 wheel and delete others
RUN if
[
"
$VLLM_USE_PRECOMPILED
"
=
"1"
]
;
then
\
echo
"Cleaning up extra wheels in dist/..."
&&
\
# Identify the most recent manylinux1_x86_64 wheel
KEEP_WHEEL=$(ls -t dist/*manylinux1_x86_64.whl 2>/dev/null | head -n1) && \
if [ -n "$KEEP_WHEEL" ]; then \
echo "Keeping wheel: $KEEP_WHEEL"; \
find dist/ -type f -name "*.whl" ! -path "${KEEP_WHEEL}" -delete; \
fi; \
fi
# Check the size of the wheel if RUN_WHEEL_CHECK is true
COPY
.buildkite/check-wheel-size.py check-wheel-size.py
# sync the default value with .buildkite/check-wheel-size.py
...
...
setup.py
View file @
a1873db2
...
...
@@ -7,6 +7,7 @@ import json
import
logging
import
os
import
re
import
shutil
import
subprocess
import
sys
from
pathlib
import
Path
...
...
@@ -297,6 +298,10 @@ class repackage_wheel(build_ext):
]).
decode
(
"utf-8"
)
upstream_main_commit
=
json
.
loads
(
resp_json
)[
"sha"
]
# In Docker build context, .git may be immutable or missing.
if
envs
.
VLLM_DOCKER_BUILD_CONTEXT
:
return
upstream_main_commit
# Check if the upstream_main_commit exists in the local repo
try
:
subprocess
.
check_output
(
...
...
@@ -357,19 +362,48 @@ class repackage_wheel(build_ext):
# create a temporary directory to store the wheel
temp_dir
=
tempfile
.
mkdtemp
(
prefix
=
"vllm-wheels"
)
wheel_path
=
os
.
path
.
join
(
temp_dir
,
wheel_filename
)
print
(
f
"Downloading wheel from
{
wheel_location
}
to
{
wheel_path
}
"
)
from
urllib.request
import
urlretrieve
try
:
urlretrieve
(
wheel_location
,
filename
=
wheel_path
)
except
Exception
as
e
:
from
setuptools.errors
import
SetupError
raise
SetupError
(
f
"Failed to get vLLM wheel from
{
wheel_location
}
"
)
from
e
# During a docker build: determine correct filename, copy wheel.
if
envs
.
VLLM_DOCKER_BUILD_CONTEXT
:
dist_dir
=
"/workspace/dist"
os
.
makedirs
(
dist_dir
,
exist_ok
=
True
)
# Determine correct wheel filename from METADATA
with
zipfile
.
ZipFile
(
wheel_path
,
"r"
)
as
z
:
metadata_file
=
next
(
(
n
for
n
in
z
.
namelist
()
if
n
.
endswith
(
".dist-info/METADATA"
)),
None
,
)
if
not
metadata_file
:
raise
RuntimeError
(
"Could not find METADATA in precompiled wheel."
)
metadata
=
z
.
read
(
metadata_file
).
decode
()
version_line
=
next
((
line
for
line
in
metadata
.
splitlines
()
if
line
.
startswith
(
"Version: "
)),
None
)
if
not
version_line
:
raise
RuntimeError
(
"Could not determine version from METADATA."
)
version
=
version_line
.
split
(
": "
)[
1
].
strip
()
# Build correct filename using internal version
arch_tag
=
"cp38-abi3-manylinux1_x86_64"
corrected_wheel_name
=
f
"vllm-
{
version
}
-
{
arch_tag
}
.whl"
final_wheel_path
=
os
.
path
.
join
(
dist_dir
,
corrected_wheel_name
)
print
(
f
"Docker build context detected, copying precompiled wheel "
f
"(
{
version
}
) to
{
final_wheel_path
}
"
)
shutil
.
copy2
(
wheel_path
,
final_wheel_path
)
return
# Unzip the wheel when not in Docker context
with
zipfile
.
ZipFile
(
wheel_path
)
as
wheel
:
files_to_copy
=
[
"vllm/_C.abi3.so"
,
...
...
@@ -378,15 +412,9 @@ class repackage_wheel(build_ext):
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so"
,
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so"
,
"vllm/cumem_allocator.abi3.so"
,
# "vllm/_version.py", # not available in nightly wheels yet
]
file_members
=
list
(
filter
(
lambda
x
:
x
.
filename
in
files_to_copy
,
wheel
.
filelist
))
# vllm_flash_attn python code:
# Regex from
# `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
compiled_regex
=
re
.
compile
(
r
"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
)
file_members
+=
list
(
...
...
@@ -403,11 +431,8 @@ class repackage_wheel(build_ext):
package_data
[
package_name
]
=
[]
wheel
.
extract
(
file
)
if
file_name
.
endswith
(
".py"
):
# python files shouldn't be added to package_data
continue
package_data
[
package_name
].
append
(
file_name
)
if
not
file_name
.
endswith
(
".py"
):
package_data
[
package_name
].
append
(
file_name
)
def
_no_device
()
->
bool
:
...
...
@@ -415,6 +440,9 @@ def _no_device() -> bool:
def
_is_cuda
()
->
bool
:
# Allow forced CUDA in Docker/precompiled builds, even without torch.cuda
if
envs
.
VLLM_USE_PRECOMPILED
and
envs
.
VLLM_DOCKER_BUILD_CONTEXT
:
return
True
has_cuda
=
torch
.
version
.
cuda
is
not
None
return
(
VLLM_TARGET_DEVICE
==
"cuda"
and
has_cuda
and
not
(
_is_neuron
()
or
_is_tpu
()))
...
...
vllm/envs.py
View file @
a1873db2
...
...
@@ -68,6 +68,7 @@ if TYPE_CHECKING:
MAX_JOBS
:
Optional
[
str
]
=
None
NVCC_THREADS
:
Optional
[
str
]
=
None
VLLM_USE_PRECOMPILED
:
bool
=
False
VLLM_DOCKER_BUILD_CONTEXT
:
bool
=
False
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL
:
bool
=
False
VLLM_NO_DEPRECATION_WARNING
:
bool
=
False
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH
:
bool
=
False
...
...
@@ -222,8 +223,14 @@ environment_variables: dict[str, Callable[[], Any]] = {
# If set, vllm will use precompiled binaries (*.so)
"VLLM_USE_PRECOMPILED"
:
lambda
:
bool
(
os
.
environ
.
get
(
"VLLM_USE_PRECOMPILED"
))
or
bool
(
os
.
environ
.
get
(
"VLLM_PRECOMPILED_WHEEL_LOCATION"
)),
lambda
:
os
.
environ
.
get
(
"VLLM_USE_PRECOMPILED"
,
""
).
strip
().
lower
()
in
(
"1"
,
"true"
)
or
bool
(
os
.
environ
.
get
(
"VLLM_PRECOMPILED_WHEEL_LOCATION"
)),
# Used to mark that setup.py is running in a Docker build context,
# in order to force the use of precompiled binaries.
"VLLM_DOCKER_BUILD_CONTEXT"
:
lambda
:
os
.
environ
.
get
(
"VLLM_DOCKER_BUILD_CONTEXT"
,
""
).
strip
().
lower
()
in
(
"1"
,
"true"
),
# Whether to force using nightly wheel in python build.
# This is used for testing the nightly wheel in python build.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment