Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
a2773f3e
Commit
a2773f3e
authored
Mar 15, 2025
by
ptarasiewiczNV
Committed by
GitHub
Mar 15, 2025
Browse files
chore: Apply patch to vLLM wheel (#177)
parent
29726360
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
103 deletions
+14
-103
container/Dockerfile.vllm
container/Dockerfile.vllm
+13
-3
container/deps/vllm/install.sh
container/deps/vllm/install.sh
+0
-99
container/deps/vllm/tests/test_patch_install.py
container/deps/vllm/tests/test_patch_install.py
+1
-1
No files found.
container/Dockerfile.vllm
View file @
a2773f3e
...
@@ -83,7 +83,7 @@ RUN cd /usr/local/src && \
...
@@ -83,7 +83,7 @@ RUN cd /usr/local/src && \
./DEBS/libibverbs* ./DEBS/ibverbs-providers* ./DEBS/librdmacm* ./DEBS/libibumad* && \
./DEBS/libibverbs* ./DEBS/ibverbs-providers* ./DEBS/librdmacm* ./DEBS/libibumad* && \
rm -rf /var/lib/apt/lists/* /usr/local/src/* mofed.tgz
rm -rf /var/lib/apt/lists/* /usr/local/src/* mofed.tgz
# Build and install Perf Analyzer for benchmarking
\
# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install rapidjson-dev zlib1g-dev
RUN apt-get update -y && apt-get -y install rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
...
@@ -194,7 +194,17 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
...
@@ -194,7 +194,17 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
ARG VLLM_REF="v0.7.2"
ARG VLLM_REF="v0.7.2"
ARG VLLM_PATCH="vllm_${VLLM_REF}-dynamo-kv-disagg-patch.patch"
ARG VLLM_PATCH="vllm_${VLLM_REF}-dynamo-kv-disagg-patch.patch"
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
bash /tmp/deps/vllm/install.sh --patch /tmp/deps/vllm/${VLLM_PATCH} --ref ${VLLM_REF} --install-cmd "uv pip install --editable" --use-precompiled --installation-dir /opt/vllm
mkdir /tmp/vllm && \
uv pip install pip wheel && \
python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==${VLLM_REF} && \
cd /tmp/vllm && \
wheel unpack *.whl && \
cd vllm-*/ && \
patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
sed -i "s/__version__ = version = '\(.*\)'/__version__ = version = '\1.dynamo_patch'/g; s/__version_tuple__ = version_tuple = (\(.*\))/__version_tuple__ = version_tuple = (\1, 'dynamo_patch')/g" vllm/_version.py && \
mkdir -p /workspace/dist && \
wheel pack . --dest-dir /workspace/dist && \
uv pip install /workspace/dist/vllm-*.whl
# Install genai-perf for benchmarking
# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
...
@@ -303,7 +313,7 @@ RUN uv venv $VIRTUAL_ENV --python 3.12 && \
...
@@ -303,7 +313,7 @@ RUN uv venv $VIRTUAL_ENV --python 3.12 && \
COPY --from=dev /workspace/dist/*.whl whls/
COPY --from=dev /workspace/dist/*.whl whls/
RUN uv pip install $(find whls -name ai_dynamo_runtime-*.whl) && \
RUN uv pip install $(find whls -name ai_dynamo_runtime-*.whl) && \
uv pip install $(find whls -name ai_dynamo-*.whl) && \
uv pip install $(find whls -name ai_dynamo-*.whl) && \
uv pip install $(find whls -name vllm
*cp312
*.whl) && \
uv pip install $(find whls -name vllm
-
*.whl) && \
rm -r whls
rm -r whls
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
...
...
container/deps/vllm/install.sh
deleted
100755 → 0
View file @
29726360
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set
-e
# Print usage information
print_usage
()
{
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --patch PATH Apply a patch file during installation"
echo
" --ref REF Specify the vLLM git reference (branch/tag/commit) to install"
echo
" --install-cmd CMD Specify the installation command (default: 'pip install')"
echo
" --use-precompiled Use precompiled kernels during installation"
echo
" --installation-dir DIR Specify the installation directory (default: 'vllm')"
echo
" --help Show this help message"
}
# Default values
INSTALL_CMD
=
"pip install"
VLLM_REF
=
"main"
PATCH_PATH
=
""
USE_PRECOMPILED
=
false
# Parse arguments
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--patch
)
PATCH_PATH
=
"
$2
"
shift
2
;;
--ref
)
VLLM_REF
=
"
$2
"
shift
2
;;
--install-cmd
)
INSTALL_CMD
=
"
$2
"
shift
2
;;
--use-precompiled
)
USE_PRECOMPILED
=
true
shift
;;
--installation-dir
)
INSTALLATION_DIR
=
"
$2
"
shift
2
;;
--help
)
print_usage
exit
0
;;
*
)
echo
"Unknown argument:
$1
"
print_usage
exit
1
;;
esac
done
# Create temp directory and clean it up on exit
# Convert patch path to absolute path if it's relative
if
[[
!
"
$PATCH_PATH
"
=
/
*
]]
;
then
PATCH_PATH
=
"
$(
pwd
)
/
${
PATCH_PATH
}
"
fi
# Clone vLLM repository
echo
"Cloning vLLM repository at ref:
$VLLM_REF
"
git clone https://github.com/vllm-project/vllm.git
"
$INSTALLATION_DIR
"
cd
"
$INSTALLATION_DIR
"
git checkout
"
$VLLM_REF
"
# Apply patch if provided
if
[
-n
"
$PATCH_PATH
"
]
;
then
echo
"Applying patch from:
$PATCH_PATH
"
git apply
"
$PATCH_PATH
"
fi
# Install using specified command
echo
"Installing using:
$INSTALL_CMD
"
if
[
"
$USE_PRECOMPILED
"
=
true
]
;
then
echo
"Using precompiled kernels"
export
VLLM_PRECOMPILED_WHEEL_LOCATION
=
"https://files.pythonhosted.org/packages/e7/c0/5b7f019aa798dedfb44c30971e9becf3c6a2db7dde311570178fa66c49c8/vllm-0.7.2-cp38-abi3-manylinux1_x86_64.whl"
fi
$INSTALL_CMD
.
echo
"Installation complete!"
container/deps/vllm/tests/test_patch_install.py
View file @
a2773f3e
...
@@ -27,4 +27,4 @@ pytestmark = pytest.mark.pre_merge
...
@@ -27,4 +27,4 @@ pytestmark = pytest.mark.pre_merge
@
pytest
.
mark
.
skipif
(
vllm
is
None
,
reason
=
"Skipping vllm tests, vllm not installed"
)
@
pytest
.
mark
.
skipif
(
vllm
is
None
,
reason
=
"Skipping vllm tests, vllm not installed"
)
def
test_version
():
def
test_version
():
# Verify that the image has the patched version of vllm
# Verify that the image has the patched version of vllm
assert
vllm
.
__version__
.
start
swith
(
"
0.7.3.dev
"
)
# type: ignore
assert
vllm
.
__version__
.
end
swith
(
"
dynamo_patch
"
)
# type: ignore
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment