Unverified Commit a2cb1c33 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

feat: update python packaging for new dynamo UX (#2054)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
Co-authored-by: default avatarAnant Sharma <anants@nvidia.com>
parent ff920537
...@@ -105,7 +105,7 @@ dynamo-build: ...@@ -105,7 +105,7 @@ dynamo-build:
FROM +rust-base FROM +rust-base
WORKDIR /workspace WORKDIR /workspace
COPY Cargo.toml Cargo.lock ./ COPY Cargo.toml Cargo.lock ./
COPY pyproject.toml README.md hatch_build.py ./ COPY pyproject.toml README.md ./
COPY components/ components/ COPY components/ components/
COPY lib/ lib/ COPY lib/ lib/
COPY launch/ launch/ COPY launch/ launch/
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
sglang[all]>=0.4.9.post2
uvloop
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
uvloop
vllm==0.9.2
...@@ -26,8 +26,8 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ...@@ -26,8 +26,8 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG ARCH=amd64 ARG ARCH=amd64
ARG ARCH_ALT=x86_64 ARG ARCH_ALT=x86_64
# Make sure to update the dependency version in pyproject.toml when updating this
ARG SGLANG_VERSION="0.4.9.post1" ARG SGLANG_VERSION="0.4.9.post1"
ARG SGL_KERNEL_VERSION="0.2.4"
################################## ##################################
########## Base Image ############ ########## Base Image ############
...@@ -116,7 +116,7 @@ RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \ ...@@ -116,7 +116,7 @@ RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
cd ${NIXL_SRC_DIR} && \ cd ${NIXL_SRC_DIR} && \
git checkout ${NIXL_REF} && \ git checkout ${NIXL_REF} && \
if [ "$ARCH" = "arm64" ]; then \ if [ "$ARCH" = "arm64" ]; then \
nixl_build_args="-Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \ nixl_build_args="-Ddisable_gds_backend=true"; \
else \ else \
nixl_build_args=""; \ nixl_build_args=""; \
fi && \ fi && \
...@@ -155,7 +155,7 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ...@@ -155,7 +155,7 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# TEMP: disable gds backend for arm64 # TEMP: disable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \ RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
--config-settings=setup-args="-Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \ --config-settings=setup-args="-Ddisable_gds_backend=true"; \
else \ else \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
fi && \ fi && \
...@@ -269,8 +269,6 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman ...@@ -269,8 +269,6 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman
RUN mkdir -p /home/$USERNAME/.cache/ RUN mkdir -p /home/$USERNAME/.cache/
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
################################## ##################################
...@@ -321,7 +319,6 @@ COPY LICENSE /workspace/ ...@@ -321,7 +319,6 @@ COPY LICENSE /workspace/
COPY Cargo.toml /workspace/ COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/ COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/ COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
# Copy source code # Copy source code
COPY lib/ /workspace/lib/ COPY lib/ /workspace/lib/
...@@ -364,18 +361,11 @@ COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME ...@@ -364,18 +361,11 @@ COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Copy rest of the code # Copy rest of the code
COPY . /workspace COPY . /workspace
# Build C bindings, creates lib/bindings/c/include
RUN cd /workspace/lib/bindings/c && cargo build --release --locked
# Package the bindings # Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \ RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \ mkdir /opt/dynamo/bindings/lib && \
cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \ cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp target/release/metrics /usr/local/bin
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \ RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
uv pip install /workspace/dist/ai_dynamo*any.whl uv pip install /workspace/dist/ai_dynamo*any.whl
...@@ -385,9 +375,6 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la ...@@ -385,9 +375,6 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc echo "cat ~/.launch_screen" >> ~/.bashrc
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ENV PYTHONPATH=/workspace/dynamo/deploy/sdk/src:/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH ENV PYTHONPATH=/workspace/dynamo/deploy/sdk/src:/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH
######################################## ########################################
...@@ -442,21 +429,13 @@ RUN apt-get update && \ ...@@ -442,21 +429,13 @@ RUN apt-get update && \
uv venv $VIRTUAL_ENV --python 3.12 && \ uv venv $VIRTUAL_ENV --python 3.12 && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Install SGLang and related packages (sgl-kernel, einops, sentencepiece) since they are not included in the runtime wheel
# https://github.com/sgl-project/sglang/blob/v0.4.9.post1/python/pyproject.toml#L18-51
ARG SGLANG_VERSION
ARG SGL_KERNEL_VERSION
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install sglang[runtime_common]==${SGLANG_VERSION} einops sgl-kernel==${SGL_KERNEL_VERSION} sentencepiece
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them # Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables # Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/ COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/ COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
RUN uv pip install ai-dynamo nixl --find-links wheelhouse RUN uv pip install ai-dynamo[sglang] --find-links wheelhouse
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
# Copy launch banner # Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
......
...@@ -110,7 +110,7 @@ RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \ ...@@ -110,7 +110,7 @@ RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
cd ${NIXL_SRC_DIR} && \ cd ${NIXL_SRC_DIR} && \
git checkout ${NIXL_REF} && \ git checkout ${NIXL_REF} && \
if [ "$ARCH" = "arm64" ]; then \ if [ "$ARCH" = "arm64" ]; then \
nixl_build_args="-Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \ nixl_build_args="-Ddisable_gds_backend=true"; \
else \ else \
nixl_build_args=""; \ nixl_build_args=""; \
fi && \ fi && \
...@@ -220,7 +220,7 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv ...@@ -220,7 +220,7 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
# TEMP: disable gds backend for arm64 # TEMP: disable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \ RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
--config-settings=setup-args="-Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \ --config-settings=setup-args="-Ddisable_gds_backend=true"; \
else \ else \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
fi && \ fi && \
...@@ -273,7 +273,6 @@ COPY LICENSE /workspace/ ...@@ -273,7 +273,6 @@ COPY LICENSE /workspace/
COPY Cargo.toml /workspace/ COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/ COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/ COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
# Copy source code # Copy source code
COPY lib/ /workspace/lib/ COPY lib/ /workspace/lib/
...@@ -311,18 +310,11 @@ COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME ...@@ -311,18 +310,11 @@ COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Copy rest of the code # Copy rest of the code
COPY . /workspace COPY . /workspace
# Build C bindings, creates lib/bindings/c/include
RUN cd /workspace/lib/bindings/c && cargo build --release --locked
# Package the bindings # Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \ RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \ mkdir /opt/dynamo/bindings/lib && \
cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \ cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp target/release/metrics /usr/local/bin
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
# Install wheels # Install wheels
RUN . /opt/dynamo/venv/bin/activate && \ RUN . /opt/dynamo/venv/bin/activate && \
...@@ -484,8 +476,10 @@ ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm" ...@@ -484,8 +476,10 @@ ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
# Copy Dynamo wheels into wheelhouse # Copy Dynamo wheels into wheelhouse
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
COPY --from=dev /workspace/wheels/nixl/*.whl wheelhouse/ COPY --from=dev /workspace/wheels/nixl/*.whl wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/ COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
# NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url, # NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
# uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match # uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
......
...@@ -18,6 +18,9 @@ ARG TORCH_BACKEND="cu128" ...@@ -18,6 +18,9 @@ ARG TORCH_BACKEND="cu128"
ARG DEEPGEMM_REF="03d0be3" ARG DEEPGEMM_REF="03d0be3"
ARG FLASHINF_REF="1d72ed4" ARG FLASHINF_REF="1d72ed4"
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_VERSION="0.9.2"
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
...@@ -39,10 +42,11 @@ ARG ARCH_ALT=x86_64 ...@@ -39,10 +42,11 @@ ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage # Redeclare ARCH, ARCH_ALT, TORCH_BACKEND, VLLM_VERSION so they're available in this stage
ARG ARCH ARG ARCH
ARG ARCH_ALT ARG ARCH_ALT
ARG TORCH_BACKEND ARG TORCH_BACKEND
ARG VLLM_VERSION
USER root USER root
ARG PYTHON_VERSION=3.12 ARG PYTHON_VERSION=3.12
...@@ -134,7 +138,7 @@ RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \ ...@@ -134,7 +138,7 @@ RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
cd ${NIXL_SRC_DIR} && \ cd ${NIXL_SRC_DIR} && \
git checkout ${NIXL_REF} && \ git checkout ${NIXL_REF} && \
if [ "$ARCH" = "arm64" ]; then \ if [ "$ARCH" = "arm64" ]; then \
nixl_build_args="-Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \ nixl_build_args="-Ddisable_gds_backend=true"; \
else \ else \
nixl_build_args=""; \ nixl_build_args=""; \
fi && \ fi && \
...@@ -171,8 +175,7 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ...@@ -171,8 +175,7 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# TEMP: disable gds backend for arm64 # TEMP: disable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \ RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
--config-settings=setup-args="-Ddisable_gds_backend=true" \ --config-settings=setup-args="-Ddisable_gds_backend=true"; \
--config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
else \ else \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
fi && \ fi && \
...@@ -190,13 +193,17 @@ ARG MAX_JOBS=16 ...@@ -190,13 +193,17 @@ ARG MAX_JOBS=16
ENV MAX_JOBS=$MAX_JOBS ENV MAX_JOBS=$MAX_JOBS
ENV CUDA_HOME=/usr/local/cuda ENV CUDA_HOME=/usr/local/cuda
# TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
# Should be able to select how you want your build to go
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/uv \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \ if [ "$ARCH" = "arm64" ]; then \
chmod +x /tmp/install_vllm.sh && \ # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND # Should be able to select how you want your build to go
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND; \
else \
uv pip install "vllm==${VLLM_VERSION}"; \
fi
ENV LD_LIBRARY_PATH=\ ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
...@@ -348,7 +355,6 @@ COPY LICENSE /workspace/ ...@@ -348,7 +355,6 @@ COPY LICENSE /workspace/
COPY Cargo.toml /workspace/ COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/ COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/ COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
# Copy source code # Copy source code
COPY lib/ /workspace/lib/ COPY lib/ /workspace/lib/
...@@ -392,22 +398,11 @@ COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME ...@@ -392,22 +398,11 @@ COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Copy rest of the code # Copy rest of the code
COPY . /workspace COPY . /workspace
# Build C bindings, creates lib/bindings/c/include
#
# TODO: In theory the 'cargo build' in earlier stage covers this, we "just" need to copy the
# `lib/bindings/c/include` folder that build.rs generated across.
# I couldn't get that to work, hence TODO.
RUN cd /workspace/lib/bindings/c && cargo build --release --locked
# Package the bindings # Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \ RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \ mkdir /opt/dynamo/bindings/lib && \
cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \ cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp target/release/metrics /usr/local/bin
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \ RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
uv pip install /workspace/dist/ai_dynamo*any.whl uv pip install /workspace/dist/ai_dynamo*any.whl
...@@ -455,9 +450,6 @@ RUN apt-get update && \ ...@@ -455,9 +450,6 @@ RUN apt-get update && \
cuda-toolkit-12-8 && \ cuda-toolkit-12-8 && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
### COPY BINDINGS ###
# Copy all bindings (wheels, lib, include) from ci_minimum
COPY --from=ci_minimum /opt/dynamo/bindings /opt/dynamo/bindings
### COPY NATS & ETCD ### ### COPY NATS & ETCD ###
# Copy nats and etcd from base image # Copy nats and etcd from base image
COPY --from=base /usr/bin/nats-server /usr/bin/nats-server COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
...@@ -466,11 +458,16 @@ ENV PATH=/usr/local/bin/etcd/:$PATH ...@@ -466,11 +458,16 @@ ENV PATH=/usr/local/bin/etcd/:$PATH
# Copy UCX from base image as plugin for NIXL # Copy UCX from base image as plugin for NIXL
# Copy NIXL source from wheel_builder image # Copy NIXL source from wheel_builder image
# Copy dynamo wheels for gitlab artifacts
COPY --from=base /usr/local/ucx /usr/local/ucx COPY --from=base /usr/local/ucx /usr/local/ucx
COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries # Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
COPY --from=base /opt/vllm /opt/vllm RUN if [ "$ARCH" = "arm64" ]; then \
COPY --from=base /opt/vllm /opt/vllm; \
fi
ENV LD_LIBRARY_PATH=\ ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
$NIXL_LIB_DIR:\ $NIXL_LIB_DIR:\
...@@ -479,10 +476,11 @@ $NIXL_PLUGIN_DIR:\ ...@@ -479,10 +476,11 @@ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib/ucx:\ /usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH $LD_LIBRARY_PATH
# Copy entire venv # Copy entire venv
# Theres a lot of stuff we'd have to re-compile # Theres a lot of stuff we'd have to re-compile (for arm64)
# Think its better to just copy # TODO: use pip ai-dynamo[vllm] in venv to replicate end user environment
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics
COPY --from=ci_minimum ${VIRTUAL_ENV} ${VIRTUAL_ENV} COPY --from=ci_minimum ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Once UX refactor is merged # Once UX refactor is merged
......
...@@ -22,7 +22,6 @@ from typer.testing import CliRunner ...@@ -22,7 +22,6 @@ from typer.testing import CliRunner
from dynamo.sdk.cli.cli import cli from dynamo.sdk.cli.cli import cli
pytestmark = pytest.mark.pre_merge
runner = CliRunner() runner = CliRunner()
......
...@@ -23,7 +23,6 @@ from typer.testing import CliRunner ...@@ -23,7 +23,6 @@ from typer.testing import CliRunner
from dynamo.sdk.cli.cli import cli from dynamo.sdk.cli.cli import cli
pytestmark = pytest.mark.pre_merge
runner = CliRunner() runner = CliRunner()
......
...@@ -13,12 +13,9 @@ ...@@ -13,12 +13,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import pytest
from dynamo.sdk.core.protocol.interface import LinkedServices from dynamo.sdk.core.protocol.interface import LinkedServices
pytestmark = pytest.mark.pre_merge
def test_remove_backend2(): def test_remove_backend2():
from dynamo.sdk.tests.pipeline import Backend, Backend2, Frontend, Middle from dynamo.sdk.tests.pipeline import Backend, Backend2, Frontend, Middle
......
...@@ -19,8 +19,6 @@ from dynamo.sdk.cli.utils import configure_target_environment ...@@ -19,8 +19,6 @@ from dynamo.sdk.cli.utils import configure_target_environment
from dynamo.sdk.core.protocol.interface import ServiceInterface from dynamo.sdk.core.protocol.interface import ServiceInterface
from dynamo.sdk.core.runner import TargetEnum from dynamo.sdk.core.runner import TargetEnum
pytestmark = pytest.mark.pre_merge
@pytest.fixture(scope="module", autouse=True) @pytest.fixture(scope="module", autouse=True)
def setup_and_teardown(): def setup_and_teardown():
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
class CustomBuildHook(BuildHookInterface):
def initialize(self, version, build_data):
if self.target_name == "wheel":
bin_path = os.getenv("DYNAMO_BIN_PATH", "target/release")
build_data["force_include"] = {
f"{bin_path}/dynamo-run": "dynamo/sdk/cli/bin/dynamo-run",
f"{bin_path}/metrics": "dynamo/sdk/cli/bin/metrics",
f"{bin_path}/mock_worker": "dynamo/sdk/cli/bin/mock_worker",
f"{bin_path}/libdynamo_llm_capi.so": "dynamo/sdk/cli/bin/libdynamo_llm_capi.so",
}
...@@ -59,33 +59,41 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynamo"] ...@@ -59,33 +59,41 @@ keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynamo"]
Repository = "https://github.com/ai-dynamo/dynamo.git" Repository = "https://github.com/ai-dynamo/dynamo.git"
[project.optional-dependencies] [project.optional-dependencies]
all = [ vllm = [
"ai-dynamo-vllm~=0.8.4", "uvloop",
"nixl", "nixl",
"vllm==0.9.2",
] ]
vllm = [ sglang = [
"ai-dynamo-vllm~=0.8.4" "uvloop",
"nixl",
"sglang[runtime_common]==0.4.9.post1",
"einops",
"sgl-kernel==0.2.4",
"sentencepiece",
] ]
[project.scripts] llama_cpp = [
dynamo = "dynamo.sdk.cli.cli:cli" "uvloop",
dynamo-run = "dynamo.sdk.cli.run_executable:dynamo_run" "llama-cpp-python",
metrics = "dynamo.sdk.cli.run_executable:metrics" ]
mock_worker = "dynamo.sdk.cli.run_executable:mock_worker"
[build-system] [build-system]
requires = ["hatchling"] requires = ["hatchling"]
build-backend = "hatchling.build" build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel] [tool.hatch.build.targets.wheel]
packages = ["deploy/sdk/src/dynamo", "components/planner/src/dynamo", "components/frontend/src/dynamo", "components/backends/llama_cpp/src/dynamo", "components/backends/mocker/src/dynamo", "components/backends/trtllm/src/dynamo", "components/backends/sglang/src/dynamo", "components/backends/vllm/src/dynamo"] packages = [
"deploy/sdk/src/dynamo",
# This section is for including the binaries in the wheel package "components/frontend/src/dynamo",
# but doesn't make them executable scripts in the venv bin directory "components/planner/src/dynamo",
"components/backends/llama_cpp/src/dynamo",
[tool.hatch.build.hooks.custom] "components/backends/mocker/src/dynamo",
path = "hatch_build.py" "components/backends/trtllm/src/dynamo",
"components/backends/sglang/src/dynamo",
"components/backends/vllm/src/dynamo"
]
[tool.hatch.metadata] [tool.hatch.metadata]
allow-direct-references = true allow-direct-references = true
...@@ -127,7 +135,6 @@ tmp_path_retention_policy = "failed" ...@@ -127,7 +135,6 @@ tmp_path_retention_policy = "failed"
# NOTE # NOTE
# We ignore model.py explcitly here to avoid mypy errors with duplicate modules # We ignore model.py explcitly here to avoid mypy errors with duplicate modules
# pytest overrides the default mypy exclude configuration and so we exclude here as well # pytest overrides the default mypy exclude configuration and so we exclude here as well
# Ignore mypy check for api-store component from Dynamo Deploy. Mypy analysis will fail since this package (and its dependencies) are not installed.
addopts = [ addopts = [
"-ra", "-ra",
"--showlocals", "--showlocals",
...@@ -149,6 +156,9 @@ filterwarnings = [ ...@@ -149,6 +156,9 @@ filterwarnings = [
"ignore:.*pkg_resources.*:UserWarning", "ignore:.*pkg_resources.*:UserWarning",
"ignore:.*multipart.*:PendingDeprecationWarning", "ignore:.*multipart.*:PendingDeprecationWarning",
"ignore:.*PyType_Spec.*custom tp_new.*:DeprecationWarning", # Ignore protobuf deprecation warning "ignore:.*PyType_Spec.*custom tp_new.*:DeprecationWarning", # Ignore protobuf deprecation warning
"ignore:.*unclosed.*socket.*:ResourceWarning", # Ignore unclosed socket warnings
"ignore:.*unclosed event loop.*:ResourceWarning", # Ignore unclosed event loop warnings
"ignore:.*Exception ignored in.*:pytest.PytestUnraisableExceptionWarning", # Ignore unraisable exception warnings
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment