Unverified Commit 3cebc864 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

feat: split monolithic requirements.txt and remove test deps from runtime image (#6656)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 14d928cb
......@@ -311,14 +311,19 @@ RUN if [ "${ENABLE_MODELEXPRESS_P2P}" = "true" ]; then \
fi
{% endif %}
# Install common and test dependencies. Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
# Install runtime dependencies (common + vllm-specific + planner + benchmarks).
# Test and dev dependencies are NOT installed here — they go in the test and dev images.
RUN --mount=type=bind,source=./container/deps/requirements.common.txt,target=/tmp/requirements.common.txt \
--mount=type=bind,source=./container/deps/requirements.vllm.txt,target=/tmp/requirements.vllm.txt \
--mount=type=bind,source=./container/deps/requirements.planner.txt,target=/tmp/requirements.planner.txt \
--mount=type=bind,source=./container/deps/requirements.benchmark.txt,target=/tmp/requirements.benchmark.txt \
--mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
--requirement /tmp/requirements.common.txt \
--requirement /tmp/requirements.vllm.txt \
--requirement /tmp/requirements.planner.txt \
--requirement /tmp/requirements.benchmark.txt
# Copy tests, deploy and components for CI with correct ownership
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
......
......@@ -10,9 +10,15 @@
import argparse
import numpy as np
import tritonclient.grpc as triton_grpc
try:
import tritonclient.grpc as triton_grpc
from tritonclient.utils import InferenceServerException
except ImportError:
triton_grpc = None
InferenceServerException = None
from google.protobuf.json_format import MessageToDict
from tritonclient.utils import InferenceServerException
def main() -> None:
......
......@@ -7,8 +7,13 @@ from contextlib import asynccontextmanager
from typing import Any, AsyncIterator, Optional, Tuple
import pytest
import tritonclient.grpc.model_config_pb2 as mc
from tritonclient.utils import InferenceServerException
try:
import tritonclient.grpc.model_config_pb2 as mc
from tritonclient.utils import InferenceServerException
except ImportError:
mc = None
InferenceServerException = None
from dynamo.llm import KserveGrpcService, ModelRuntimeConfig, PythonAsyncEngine
......
......@@ -15,10 +15,6 @@ requires-python = ">=3.10"
dependencies = [
"ai-dynamo-runtime==1.0.0",
"transformers>=4.56.0",
"pytest>=8.3.4",
"types-aiofiles>=24.1.0",
"types-psutil>=7.0.0.20250218",
"types-requests>=2.32.4.20260107",
"kubernetes>=32.0.1,<33.0.0",
"fastapi>=0.115.0",
"distro",
......@@ -28,6 +24,7 @@ dependencies = [
"click<8.2.0",
"setuptools",
"prometheus_client>=0.23.1,<1.0",
"msgpack==1.1.2",
]
classifiers = [
......@@ -52,7 +49,6 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
[project.optional-dependencies]
trtllm =[
"uvloop",
"msgpack==1.1.2",
"tensorrt-llm==1.3.0rc5.post1",
]
......
......@@ -139,11 +139,19 @@ RUN --mount=type=cache,target=/root/.cache/uv \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
&& rm -rf /opt/dynamo/wheelhouse
# Install common and test dependencies
COPY container/deps/requirements.txt /tmp/requirements.txt
# Install runtime dependencies (common + vllm-specific + planner + benchmarks) and test dependencies
COPY container/deps/requirements.common.txt /tmp/requirements.common.txt
COPY container/deps/requirements.vllm.txt /tmp/requirements.vllm.txt
COPY container/deps/requirements.planner.txt /tmp/requirements.planner.txt
COPY container/deps/requirements.benchmark.txt /tmp/requirements.benchmark.txt
COPY container/deps/requirements.test.txt /tmp/requirements.test.txt
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt && \
uv pip install \
--requirement /tmp/requirements.common.txt \
--requirement /tmp/requirements.vllm.txt \
--requirement /tmp/requirements.planner.txt \
--requirement /tmp/requirements.benchmark.txt \
--requirement /tmp/requirements.test.txt && \
rm /tmp/requirements*.txt
# Copy workspace files
......
......@@ -15,7 +15,6 @@ import time
import pytest
import requests
import tritonclient.grpc as grpcclient
from tests.utils.constants import QWEN
from tests.utils.managed_process import DynamoFrontendProcess, ManagedProcess
......@@ -80,6 +79,8 @@ def check_grpc_server_ready(
Raises:
Exception: If server is not ready after max_attempts
"""
import tritonclient.grpc as grpcclient
for attempt in range(max_attempts):
try:
client = grpcclient.InferenceServerClient(f"localhost:{port}")
......
......@@ -20,8 +20,16 @@ from functools import partial
import numpy as np
import pytest
import triton_echo_client
import tritonclient.grpc as grpcclient
try:
import tritonclient.grpc as grpcclient
except ImportError:
grpcclient = None
try:
import triton_echo_client
except ImportError:
triton_echo_client = None
from tests.utils.constants import QWEN
from tests.utils.managed_process import ManagedProcess
......
......@@ -16,7 +16,11 @@ import shutil
import numpy as np
import pytest
import tritonclient.grpc as grpcclient
try:
import tritonclient.grpc as grpcclient
except ImportError:
grpcclient = None
from tests.utils.managed_process import ManagedProcess
......
......@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann
The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales
prefill/decode workers based on TTFT, ITL, and request patterns.
To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt`
To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.common.txt -r container/deps/requirements.planner.txt`
## Pre-Requisite: Pre-Deployment Profiling Data
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment