feat: split monolithic requirements.txt and remove test deps from runtime image (#6656)

Signed-off-by: Anant Sharma <anants@nvidia.com>

feat: split monolithic requirements.txt and remove test deps from runtime image (#6656)
Signed-off-by: Anant Sharma <anants@nvidia.com>
3cebc864 · Anant Sharma · GitHub · 14d928cb · 3cebc864 · 3cebc864
Unverified Commit 3cebc864 authored Mar 10, 2026 by Anant Sharma Committed by GitHub Mar 10, 2026
9 changed files
--- a/container/templates/vllm_runtime.Dockerfile
+++ b/container/templates/vllm_runtime.Dockerfile
@@ -311,14 +311,19 @@ RUN if [ "${ENABLE_MODELEXPRESS_P2P}" = "true" ]; then \
    fi
 {% endif %}

-# Install common and test dependencies. Cache uv downloads; uv handles its own locking for this cache.
-RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
-    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
+# Install runtime dependencies (common + vllm-specific + planner + benchmarks).
+# Test and dev dependencies are NOT installed here — they go in the test and dev images.
+RUN --mount=type=bind,source=./container/deps/requirements.common.txt,target=/tmp/requirements.common.txt \
+    --mount=type=bind,source=./container/deps/requirements.vllm.txt,target=/tmp/requirements.vllm.txt \
+    --mount=type=bind,source=./container/deps/requirements.planner.txt,target=/tmp/requirements.planner.txt \
+    --mount=type=bind,source=./container/deps/requirements.benchmark.txt,target=/tmp/requirements.benchmark.txt \
    --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
    export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
    uv pip install \
-        --requirement /tmp/requirements.txt \
-        --requirement /tmp/requirements.test.txt
+        --requirement /tmp/requirements.common.txt \
+        --requirement /tmp/requirements.vllm.txt \
+        --requirement /tmp/requirements.planner.txt \
+        --requirement /tmp/requirements.benchmark.txt

 # Copy tests, deploy and components for CI with correct ownership
 # Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>

--- a/lib/bindings/python/examples/kserve_grpc_service/test_client.py
+++ b/lib/bindings/python/examples/kserve_grpc_service/test_client.py
@@ -10,9 +10,15 @@
 import argparse

 import numpy as np
-import tritonclient.grpc as triton_grpc
+
+try:
+    import tritonclient.grpc as triton_grpc
+    from tritonclient.utils import InferenceServerException
+except ImportError:
+    triton_grpc = None
+    InferenceServerException = None
+
 from google.protobuf.json_format import MessageToDict
-from tritonclient.utils import InferenceServerException


 def main() -> None:

--- a/lib/bindings/python/tests/test_kserve_grpc.py
+++ b/lib/bindings/python/tests/test_kserve_grpc.py
@@ -7,8 +7,13 @@ from contextlib import asynccontextmanager
 from typing import Any, AsyncIterator, Optional, Tuple

 import pytest
-import tritonclient.grpc.model_config_pb2 as mc
-from tritonclient.utils import InferenceServerException
+
+try:
+    import tritonclient.grpc.model_config_pb2 as mc
+    from tritonclient.utils import InferenceServerException
+except ImportError:
+    mc = None
+    InferenceServerException = None

 from dynamo.llm import KserveGrpcService, ModelRuntimeConfig, PythonAsyncEngine


--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,10 +15,6 @@ requires-python = ">=3.10"
 dependencies = [
    "ai-dynamo-runtime==1.0.0",
    "transformers>=4.56.0",
-    "pytest>=8.3.4",
-    "types-aiofiles>=24.1.0",
-    "types-psutil>=7.0.0.20250218",
-    "types-requests>=2.32.4.20260107",
    "kubernetes>=32.0.1,<33.0.0",
    "fastapi>=0.115.0",
    "distro",
@@ -28,6 +24,7 @@ dependencies = [
    "click<8.2.0",
    "setuptools",
    "prometheus_client>=0.23.1,<1.0",
+    "msgpack==1.1.2",
 ]

 classifiers = [
@@ -52,7 +49,6 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
 [project.optional-dependencies]
 trtllm =[
    "uvloop",
-    "msgpack==1.1.2",
    "tensorrt-llm==1.3.0rc5.post1",
 ]


--- a/tests/fault_tolerance/deploy/container/Dockerfile.local_vllm
+++ b/tests/fault_tolerance/deploy/container/Dockerfile.local_vllm
@@ -139,11 +139,19 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    /opt/dynamo/wheelhouse/nixl/nixl*.whl \
    && rm -rf /opt/dynamo/wheelhouse

-# Install common and test dependencies
-COPY container/deps/requirements.txt /tmp/requirements.txt
+# Install runtime dependencies (common + vllm-specific + planner + benchmarks) and test dependencies
+COPY container/deps/requirements.common.txt /tmp/requirements.common.txt
+COPY container/deps/requirements.vllm.txt /tmp/requirements.vllm.txt
+COPY container/deps/requirements.planner.txt /tmp/requirements.planner.txt
+COPY container/deps/requirements.benchmark.txt /tmp/requirements.benchmark.txt
 COPY container/deps/requirements.test.txt /tmp/requirements.test.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt && \
+    uv pip install \
+        --requirement /tmp/requirements.common.txt \
+        --requirement /tmp/requirements.vllm.txt \
+        --requirement /tmp/requirements.planner.txt \
+        --requirement /tmp/requirements.benchmark.txt \
+        --requirement /tmp/requirements.test.txt && \
    rm /tmp/requirements*.txt

 # Copy workspace files

--- a/tests/frontend/conftest.py
+++ b/tests/frontend/conftest.py
@@ -15,7 +15,6 @@ import time

 import pytest
 import requests
-import tritonclient.grpc as grpcclient

 from tests.utils.constants import QWEN
 from tests.utils.managed_process import DynamoFrontendProcess, ManagedProcess
@@ -80,6 +79,8 @@ def check_grpc_server_ready(
    Raises:
        Exception: If server is not ready after max_attempts
    """
+    import tritonclient.grpc as grpcclient
+
    for attempt in range(max_attempts):
        try:
            client = grpcclient.InferenceServerClient(f"localhost:{port}")

--- a/tests/frontend/grpc/test_tensor_mocker_engine.py
+++ b/tests/frontend/grpc/test_tensor_mocker_engine.py
@@ -20,8 +20,16 @@ from functools import partial

 import numpy as np
 import pytest
-import triton_echo_client
-import tritonclient.grpc as grpcclient
+
+try:
+    import tritonclient.grpc as grpcclient
+except ImportError:
+    grpcclient = None
+
+try:
+    import triton_echo_client
+except ImportError:
+    triton_echo_client = None

 from tests.utils.constants import QWEN
 from tests.utils.managed_process import ManagedProcess

--- a/tests/frontend/grpc/test_tensor_parameters.py
+++ b/tests/frontend/grpc/test_tensor_parameters.py
@@ -16,7 +16,11 @@ import shutil

 import numpy as np
 import pytest
-import tritonclient.grpc as grpcclient
+
+try:
+    import tritonclient.grpc as grpcclient
+except ImportError:
+    grpcclient = None

 from tests.utils.managed_process import ManagedProcess


--- a/tests/planner/README.md
+++ b/tests/planner/README.md
@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann
 The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales
 prefill/decode workers based on TTFT, ITL, and request patterns.

-To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt`
+To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.common.txt -r container/deps/requirements.planner.txt`

 ## Pre-Requisite: Pre-Deployment Profiling Data