Unverified Commit 316e8844 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

build: enable kvbm in vllm container (#2763)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 2422b83d
...@@ -69,8 +69,13 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base ...@@ -69,8 +69,13 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# Use dynamo base image (see /container/Dockerfile for more details) # Use dynamo base image (see /container/Dockerfile for more details)
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
ARG PYTHON_VERSION
RUN apt-get update -y \ RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
build-essential \
# vLLM build dependencies # vLLM build dependencies
cmake \ cmake \
ibverbs-providers \ ibverbs-providers \
...@@ -86,7 +91,6 @@ RUN apt-get update -y \ ...@@ -86,7 +91,6 @@ RUN apt-get update -y \
### VIRTUAL ENVIRONMENT SETUP ### ### VIRTUAL ENVIRONMENT SETUP ###
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
ARG PYTHON_VERSION
# Create virtual environment # Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \ RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION uv venv /opt/dynamo/venv --python $PYTHON_VERSION
...@@ -170,6 +174,7 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv ...@@ -170,6 +174,7 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ARG ARCH_ALT ARG ARCH_ALT
ARG PYTHON_VERSION
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
...@@ -178,7 +183,7 @@ ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins ...@@ -178,7 +183,7 @@ ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
RUN apt-get update && \ RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Python runtime - CRITICAL for virtual environment to work # Python runtime - CRITICAL for virtual environment to work
python3-dev \ python${PYTHON_VERSION}-dev \
build-essential \ build-essential \
# jq and curl for polling various endpoints and health checks # jq and curl for polling various endpoints and health checks
jq \ jq \
...@@ -242,14 +247,9 @@ $LD_LIBRARY_PATH ...@@ -242,14 +247,9 @@ $LD_LIBRARY_PATH
### VIRTUAL ENVIRONMENT SETUP ### ### VIRTUAL ENVIRONMENT SETUP ###
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ # Copy uv and entire virtual environment from framework container
ARG PYTHON_VERSION COPY --from=framework /bin/uv /bin/uvx /bin/
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION COPY --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy virtual environment from framework image to avoid re-installing framework + vllm dependencies
COPY --from=framework \
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages \
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages
# Install dynamo, NIXL, and dynamo-specific dependencies # Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/ COPY benchmarks/ /opt/dynamo/benchmarks/
......
...@@ -580,6 +580,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then ...@@ -580,6 +580,11 @@ if [ ! -z ${RELEASE_BUILD} ]; then
BUILD_ARGS+=" --build-arg RELEASE_BUILD=${RELEASE_BUILD} " BUILD_ARGS+=" --build-arg RELEASE_BUILD=${RELEASE_BUILD} "
fi fi
if [[ $FRAMEWORK == "VLLM" ]]; then
echo "Forcing enable_kvbm to true in vLLM image build"
ENABLE_KVBM=true
fi
if [ ! -z ${ENABLE_KVBM} ]; then if [ ! -z ${ENABLE_KVBM} ]; then
echo "Enabling the KVBM in the ai-dynamo-runtime" echo "Enabling the KVBM in the ai-dynamo-runtime"
BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} " BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
......
...@@ -33,10 +33,10 @@ import requests ...@@ -33,10 +33,10 @@ import requests
# Todo: enable the rest when kvbm is built in the ci # Todo: enable the rest when kvbm is built in the ci
pytestmark = [ pytestmark = [
pytest.mark.kvbm, pytest.mark.kvbm,
# pytest.mark.e2e, pytest.mark.e2e,
# pytest.mark.slow, pytest.mark.slow,
# pytest.mark.nightly, pytest.mark.nightly,
# pytest.mark.gpu_1, pytest.mark.gpu_1,
] ]
...@@ -774,6 +774,7 @@ class TestDeterminism: ...@@ -774,6 +774,7 @@ class TestDeterminism:
], ],
indirect=True, indirect=True,
) )
@pytest.mark.vllm
def test_determinism_with_cache_reset(self, tester, llm_server, runtime_services): def test_determinism_with_cache_reset(self, tester, llm_server, runtime_services):
"""Test determinism across cache reset: run test with warmup, reset cache, run again without warmup.""" """Test determinism across cache reset: run test with warmup, reset cache, run again without warmup."""
print("\n" + "=" * 70) print("\n" + "=" * 70)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment