Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
316e8844
Unverified
Commit
316e8844
authored
Sep 02, 2025
by
Anant Sharma
Committed by
GitHub
Sep 02, 2025
Browse files
build: enable kvbm in vllm container (#2763)
Signed-off-by:
Anant Sharma
<
anants@nvidia.com
>
parent
2422b83d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
14 deletions
+20
-14
container/Dockerfile.vllm
container/Dockerfile.vllm
+10
-10
container/build.sh
container/build.sh
+5
-0
tests/kvbm/test_determinism.py
tests/kvbm/test_determinism.py
+5
-4
No files found.
container/Dockerfile.vllm
View file @
316e8844
...
@@ -69,8 +69,13 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
...
@@ -69,8 +69,13 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# Use dynamo base image (see /container/Dockerfile for more details)
# Use dynamo base image (see /container/Dockerfile for more details)
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
ARG PYTHON_VERSION
RUN apt-get update -y \
RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
build-essential \
# vLLM build dependencies
# vLLM build dependencies
cmake \
cmake \
ibverbs-providers \
ibverbs-providers \
...
@@ -86,7 +91,6 @@ RUN apt-get update -y \
...
@@ -86,7 +91,6 @@ RUN apt-get update -y \
### VIRTUAL ENVIRONMENT SETUP ###
### VIRTUAL ENVIRONMENT SETUP ###
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
ARG PYTHON_VERSION
# Create virtual environment
# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
...
@@ -170,6 +174,7 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
...
@@ -170,6 +174,7 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ARG ARCH_ALT
ARG ARCH_ALT
ARG PYTHON_VERSION
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
...
@@ -178,7 +183,7 @@ ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
...
@@ -178,7 +183,7 @@ ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
RUN apt-get update && \
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Python runtime - CRITICAL for virtual environment to work
# Python runtime - CRITICAL for virtual environment to work
python
3
-dev \
python
${PYTHON_VERSION}
-dev \
build-essential \
build-essential \
# jq and curl for polling various endpoints and health checks
# jq and curl for polling various endpoints and health checks
jq \
jq \
...
@@ -242,14 +247,9 @@ $LD_LIBRARY_PATH
...
@@ -242,14 +247,9 @@ $LD_LIBRARY_PATH
### VIRTUAL ENVIRONMENT SETUP ###
### VIRTUAL ENVIRONMENT SETUP ###
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Copy uv and entire virtual environment from framework container
ARG PYTHON_VERSION
COPY --from=framework /bin/uv /bin/uvx /bin/
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION
COPY --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy virtual environment from framework image to avoid re-installing framework + vllm dependencies
COPY --from=framework \
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages \
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages
# Install dynamo, NIXL, and dynamo-specific dependencies
# Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY benchmarks/ /opt/dynamo/benchmarks/
...
...
container/build.sh
View file @
316e8844
...
@@ -580,6 +580,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then
...
@@ -580,6 +580,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then
BUILD_ARGS+
=
" --build-arg RELEASE_BUILD=
${
RELEASE_BUILD
}
"
BUILD_ARGS+
=
" --build-arg RELEASE_BUILD=
${
RELEASE_BUILD
}
"
fi
fi
if
[[
$FRAMEWORK
==
"VLLM"
]]
;
then
echo
"Forcing enable_kvbm to true in vLLM image build"
ENABLE_KVBM
=
true
fi
if
[
!
-z
${
ENABLE_KVBM
}
]
;
then
if
[
!
-z
${
ENABLE_KVBM
}
]
;
then
echo
"Enabling the KVBM in the ai-dynamo-runtime"
echo
"Enabling the KVBM in the ai-dynamo-runtime"
BUILD_ARGS+
=
" --build-arg ENABLE_KVBM=
${
ENABLE_KVBM
}
"
BUILD_ARGS+
=
" --build-arg ENABLE_KVBM=
${
ENABLE_KVBM
}
"
...
...
tests/kvbm/test_determinism.py
View file @
316e8844
...
@@ -33,10 +33,10 @@ import requests
...
@@ -33,10 +33,10 @@ import requests
# Todo: enable the rest when kvbm is built in the ci
# Todo: enable the rest when kvbm is built in the ci
pytestmark
=
[
pytestmark
=
[
pytest
.
mark
.
kvbm
,
pytest
.
mark
.
kvbm
,
#
pytest.mark.e2e,
pytest
.
mark
.
e2e
,
#
pytest.mark.slow,
pytest
.
mark
.
slow
,
#
pytest.mark.nightly,
pytest
.
mark
.
nightly
,
#
pytest.mark.gpu_1,
pytest
.
mark
.
gpu_1
,
]
]
...
@@ -774,6 +774,7 @@ class TestDeterminism:
...
@@ -774,6 +774,7 @@ class TestDeterminism:
],
],
indirect
=
True
,
indirect
=
True
,
)
)
@
pytest
.
mark
.
vllm
def
test_determinism_with_cache_reset
(
self
,
tester
,
llm_server
,
runtime_services
):
def
test_determinism_with_cache_reset
(
self
,
tester
,
llm_server
,
runtime_services
):
"""Test determinism across cache reset: run test with warmup, reset cache, run again without warmup."""
"""Test determinism across cache reset: run test with warmup, reset cache, run again without warmup."""
print
(
"
\n
"
+
"="
*
70
)
print
(
"
\n
"
+
"="
*
70
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment