Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
58bb9021
Unverified
Commit
58bb9021
authored
Jul 31, 2025
by
Doug Smith
Committed by
GitHub
Jul 31, 2025
Browse files
fix(setup): improve precompiled wheel setup for Docker builds (#22025)
Signed-off-by:
dougbtv
<
dosmith@redhat.com
>
parent
7349d526
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
104 additions
and
124 deletions
+104
-124
docker/Dockerfile
docker/Dockerfile
+1
-0
requirements/test.txt
requirements/test.txt
+16
-8
setup.py
setup.py
+87
-116
No files found.
docker/Dockerfile
View file @
58bb9021
...
...
@@ -370,6 +370,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
fi
# Install vllm wheel first, so that torch etc will be installed.
# !bang
RUN
--mount
=
type
=
bind
,from
=
build,src
=
/workspace/dist,target
=
/vllm-workspace/dist
\
--mount
=
type
=
cache,target
=
/root/.cache/uv
\
uv pip
install
--system
dist/
*
.whl
--verbose
\
...
...
requirements/test.txt
View file @
58bb9021
...
...
@@ -22,9 +22,7 @@ aiohttp==3.10.11
aiohttp-cors==0.8.1
# via ray
aiosignal==1.3.1
# via
# aiohttp
# ray
# via aiohttp
albucore==0.0.16
# via terratorch
albumentations==1.4.6
...
...
@@ -139,7 +137,7 @@ contourpy==1.3.0
# via matplotlib
cramjam==2.9.0
# via fastparquet
cupy-cuda12x==13.
3.0
cupy-cuda12x==13.
5.1
# via ray
cycler==0.12.1
# via matplotlib
...
...
@@ -226,7 +224,6 @@ frozenlist==1.5.0
# via
# aiohttp
# aiosignal
# ray
fsspec==2024.9.0
# via
# datasets
...
...
@@ -603,10 +600,18 @@ opencv-python-headless==4.11.0.86
opentelemetry-api==1.35.0
# via
# mlflow-skinny
# opentelemetry-exporter-prometheus
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-exporter-prometheus==0.56b0
# via ray
opentelemetry-proto==1.36.0
# via ray
opentelemetry-sdk==1.35.0
# via mlflow-skinny
# via
# mlflow-skinny
# opentelemetry-exporter-prometheus
# ray
opentelemetry-semantic-conventions==0.56b0
# via opentelemetry-sdk
packaging==24.2
...
...
@@ -697,7 +702,9 @@ pqdm==0.2.0
pretrainedmodels==0.7.4
# via segmentation-models-pytorch
prometheus-client==0.22.0
# via ray
# via
# opentelemetry-exporter-prometheus
# ray
propcache==0.2.0
# via yarl
proto-plus==1.26.1
...
...
@@ -707,6 +714,7 @@ protobuf==5.28.3
# google-api-core
# googleapis-common-protos
# mlflow-skinny
# opentelemetry-proto
# proto-plus
# ray
# tensorboardx
...
...
@@ -854,7 +862,7 @@ rasterio==1.4.3
# rioxarray
# terratorch
# torchgeo
ray==2.4
3
.0
ray==2.4
8
.0
# via -r requirements/test.in
redis==5.2.0
# via tensorizer
...
...
setup.py
View file @
58bb9021
...
...
@@ -282,10 +282,69 @@ class cmake_build_ext(build_ext):
self
.
copy_file
(
file
,
dst_file
)
class
re
package_wheel
(
build_ext
)
:
class
p
re
compiled_wheel_utils
:
"""Extracts libraries and other files from an existing wheel."""
def
get_base_commit_in_main_branch
(
self
)
->
str
:
@
staticmethod
def
extract_precompiled_and_patch_package
(
wheel_url_or_path
:
str
)
->
dict
:
import
tempfile
import
zipfile
temp_dir
=
None
try
:
if
not
os
.
path
.
isfile
(
wheel_url_or_path
):
wheel_filename
=
wheel_url_or_path
.
split
(
"/"
)[
-
1
]
temp_dir
=
tempfile
.
mkdtemp
(
prefix
=
"vllm-wheels"
)
wheel_path
=
os
.
path
.
join
(
temp_dir
,
wheel_filename
)
print
(
f
"Downloading wheel from
{
wheel_url_or_path
}
"
f
"to
{
wheel_path
}
"
)
from
urllib.request
import
urlretrieve
urlretrieve
(
wheel_url_or_path
,
filename
=
wheel_path
)
else
:
wheel_path
=
wheel_url_or_path
print
(
f
"Using existing wheel at
{
wheel_path
}
"
)
package_data_patch
=
{}
with
zipfile
.
ZipFile
(
wheel_path
)
as
wheel
:
files_to_copy
=
[
"vllm/_C.abi3.so"
,
"vllm/_moe_C.abi3.so"
,
"vllm/_flashmla_C.abi3.so"
,
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so"
,
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so"
,
"vllm/cumem_allocator.abi3.so"
,
]
compiled_regex
=
re
.
compile
(
r
"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
)
file_members
=
list
(
filter
(
lambda
x
:
x
.
filename
in
files_to_copy
,
wheel
.
filelist
))
file_members
+=
list
(
filter
(
lambda
x
:
compiled_regex
.
match
(
x
.
filename
),
wheel
.
filelist
))
for
file
in
file_members
:
print
(
f
"[extract]
{
file
.
filename
}
"
)
target_path
=
os
.
path
.
join
(
"."
,
file
.
filename
)
os
.
makedirs
(
os
.
path
.
dirname
(
target_path
),
exist_ok
=
True
)
with
wheel
.
open
(
file
.
filename
)
as
src
,
open
(
target_path
,
"wb"
)
as
dst
:
shutil
.
copyfileobj
(
src
,
dst
)
pkg
=
os
.
path
.
dirname
(
file
.
filename
).
replace
(
"/"
,
"."
)
package_data_patch
.
setdefault
(
pkg
,
[]).
append
(
os
.
path
.
basename
(
file
.
filename
))
return
package_data_patch
finally
:
if
temp_dir
is
not
None
:
print
(
f
"Removing temporary directory
{
temp_dir
}
"
)
shutil
.
rmtree
(
temp_dir
)
@
staticmethod
def
get_base_commit_in_main_branch
()
->
str
:
# Force to use the nightly wheel. This is mainly used for CI testing.
if
envs
.
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL
:
return
"nightly"
...
...
@@ -334,115 +393,6 @@ class repackage_wheel(build_ext):
"wheel may not be compatible with your dev branch: %s"
,
err
)
return
"nightly"
def
run
(
self
)
->
None
:
assert
_is_cuda
(
),
"VLLM_USE_PRECOMPILED is only supported for CUDA builds"
wheel_location
=
os
.
getenv
(
"VLLM_PRECOMPILED_WHEEL_LOCATION"
,
None
)
if
wheel_location
is
None
:
base_commit
=
self
.
get_base_commit_in_main_branch
()
wheel_location
=
f
"https://wheels.vllm.ai/
{
base_commit
}
/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
# Fallback to nightly wheel if latest commit wheel is unavailable,
# in this rare case, the nightly release CI hasn't finished on main.
if
not
is_url_available
(
wheel_location
):
wheel_location
=
"https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
import
zipfile
if
os
.
path
.
isfile
(
wheel_location
):
wheel_path
=
wheel_location
print
(
f
"Using existing wheel=
{
wheel_path
}
"
)
else
:
# Download the wheel from a given URL, assume
# the filename is the last part of the URL
wheel_filename
=
wheel_location
.
split
(
"/"
)[
-
1
]
import
tempfile
# create a temporary directory to store the wheel
temp_dir
=
tempfile
.
mkdtemp
(
prefix
=
"vllm-wheels"
)
wheel_path
=
os
.
path
.
join
(
temp_dir
,
wheel_filename
)
print
(
f
"Downloading wheel from
{
wheel_location
}
to
{
wheel_path
}
"
)
from
urllib.request
import
urlretrieve
try
:
urlretrieve
(
wheel_location
,
filename
=
wheel_path
)
except
Exception
as
e
:
from
setuptools.errors
import
SetupError
raise
SetupError
(
f
"Failed to get vLLM wheel from
{
wheel_location
}
"
)
from
e
# Set the dist_dir for Docker build context
dist_dir
=
(
"/workspace/dist"
if
envs
.
VLLM_DOCKER_BUILD_CONTEXT
else
"dist"
)
os
.
makedirs
(
dist_dir
,
exist_ok
=
True
)
# Extract only necessary compiled .so files from precompiled wheel
with
zipfile
.
ZipFile
(
wheel_path
)
as
wheel
:
# Get version from METADATA (optional, mostly useful for logging)
metadata_file
=
next
((
n
for
n
in
wheel
.
namelist
()
if
n
.
endswith
(
".dist-info/METADATA"
)),
None
)
if
not
metadata_file
:
raise
RuntimeError
(
"Could not find METADATA in precompiled wheel."
)
metadata
=
wheel
.
read
(
metadata_file
).
decode
()
version_line
=
next
((
line
for
line
in
metadata
.
splitlines
()
if
line
.
startswith
(
"Version: "
)),
None
)
if
not
version_line
:
raise
RuntimeError
(
"Could not determine version from METADATA."
)
version
=
version_line
.
split
(
": "
)[
1
].
strip
()
print
(
f
"Extracting precompiled kernels from vLLM wheel version: "
f
"
{
version
}
"
)
# List of compiled shared objects to extract
files_to_copy
=
[
"vllm/_C.abi3.so"
,
"vllm/_moe_C.abi3.so"
,
"vllm/_flashmla_C.abi3.so"
,
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so"
,
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so"
,
"vllm/cumem_allocator.abi3.so"
,
]
file_members
=
list
(
filter
(
lambda
x
:
x
.
filename
in
files_to_copy
,
wheel
.
filelist
))
compiled_regex
=
re
.
compile
(
r
"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
)
file_members
+=
list
(
filter
(
lambda
x
:
compiled_regex
.
match
(
x
.
filename
),
wheel
.
filelist
))
for
file
in
file_members
:
print
(
f
"Extracting and including
{
file
.
filename
}
"
"from existing wheel"
)
package_name
=
os
.
path
.
dirname
(
file
.
filename
).
replace
(
"/"
,
"."
)
file_name
=
os
.
path
.
basename
(
file
.
filename
)
if
package_name
not
in
package_data
:
package_data
[
package_name
]
=
[]
output_base
=
(
dist_dir
if
envs
.
VLLM_DOCKER_BUILD_CONTEXT
else
"."
)
target_path
=
os
.
path
.
join
(
output_base
,
file
.
filename
)
os
.
makedirs
(
os
.
path
.
dirname
(
target_path
),
exist_ok
=
True
)
with
wheel
.
open
(
file
.
filename
)
as
src
,
open
(
target_path
,
"wb"
)
as
dst
:
shutil
.
copyfileobj
(
src
,
dst
)
package_data
[
package_name
].
append
(
file_name
)
# Copy wheel into dist dir for Docker to consume (e.g., via --mount)
if
envs
.
VLLM_DOCKER_BUILD_CONTEXT
:
arch_tag
=
"cp38-abi3-manylinux1_x86_64"
corrected_wheel_name
=
f
"vllm-
{
version
}
-
{
arch_tag
}
.whl"
final_wheel_path
=
os
.
path
.
join
(
dist_dir
,
corrected_wheel_name
)
print
(
"Docker build context detected, copying precompiled wheel to "
f
"
{
final_wheel_path
}
"
)
shutil
.
copy2
(
wheel_path
,
final_wheel_path
)
def
_no_device
()
->
bool
:
return
VLLM_TARGET_DEVICE
==
"empty"
...
...
@@ -676,16 +626,37 @@ package_data = {
]
}
# If using precompiled, extract and patch package_data (in advance of setup)
if
envs
.
VLLM_USE_PRECOMPILED
:
assert
_is_cuda
(),
"VLLM_USE_PRECOMPILED is only supported for CUDA builds"
wheel_location
=
os
.
getenv
(
"VLLM_PRECOMPILED_WHEEL_LOCATION"
,
None
)
if
wheel_location
is
not
None
:
wheel_url
=
wheel_location
else
:
base_commit
=
precompiled_wheel_utils
.
get_base_commit_in_main_branch
()
wheel_url
=
f
"https://wheels.vllm.ai/
{
base_commit
}
/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
from
urllib.request
import
urlopen
try
:
with
urlopen
(
wheel_url
)
as
resp
:
if
resp
.
status
!=
200
:
wheel_url
=
"https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
except
Exception
as
e
:
print
(
f
"[warn] Falling back to nightly wheel:
{
e
}
"
)
wheel_url
=
"https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
patch
=
precompiled_wheel_utils
.
extract_precompiled_and_patch_package
(
wheel_url
)
for
pkg
,
files
in
patch
.
items
():
package_data
.
setdefault
(
pkg
,
[]).
extend
(
files
)
if
_no_device
():
ext_modules
=
[]
if
not
ext_modules
:
if
not
ext_modules
or
envs
.
VLLM_USE_PRECOMPILED
:
# Disable build_ext when using precompiled wheel
cmdclass
=
{}
else
:
cmdclass
=
{
"build_ext"
:
repackage_wheel
if
envs
.
VLLM_USE_PRECOMPILED
else
cmake_build_ext
}
cmdclass
=
{
"build_ext"
:
cmake_build_ext
}
setup
(
# static metadata should rather go in pyproject.toml
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment