Unverified Commit 3cebc864 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

feat: split monolithic requirements.txt and remove test deps from runtime image (#6656)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 14d928cb
...@@ -311,14 +311,19 @@ RUN if [ "${ENABLE_MODELEXPRESS_P2P}" = "true" ]; then \ ...@@ -311,14 +311,19 @@ RUN if [ "${ENABLE_MODELEXPRESS_P2P}" = "true" ]; then \
fi fi
{% endif %} {% endif %}
# Install common and test dependencies. Cache uv downloads; uv handles its own locking for this cache. # Install runtime dependencies (common + vllm-specific + planner + benchmarks).
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ # Test and dev dependencies are NOT installed here — they go in the test and dev images.
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ RUN --mount=type=bind,source=./container/deps/requirements.common.txt,target=/tmp/requirements.common.txt \
--mount=type=bind,source=./container/deps/requirements.vllm.txt,target=/tmp/requirements.vllm.txt \
--mount=type=bind,source=./container/deps/requirements.planner.txt,target=/tmp/requirements.planner.txt \
--mount=type=bind,source=./container/deps/requirements.benchmark.txt,target=/tmp/requirements.benchmark.txt \
--mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \ --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \ export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install \ uv pip install \
--requirement /tmp/requirements.txt \ --requirement /tmp/requirements.common.txt \
--requirement /tmp/requirements.test.txt --requirement /tmp/requirements.vllm.txt \
--requirement /tmp/requirements.planner.txt \
--requirement /tmp/requirements.benchmark.txt
# Copy tests, deploy and components for CI with correct ownership # Copy tests, deploy and components for CI with correct ownership
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path> # Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
......
...@@ -10,9 +10,15 @@ ...@@ -10,9 +10,15 @@
import argparse import argparse
import numpy as np import numpy as np
import tritonclient.grpc as triton_grpc
try:
import tritonclient.grpc as triton_grpc
from tritonclient.utils import InferenceServerException
except ImportError:
triton_grpc = None
InferenceServerException = None
from google.protobuf.json_format import MessageToDict from google.protobuf.json_format import MessageToDict
from tritonclient.utils import InferenceServerException
def main() -> None: def main() -> None:
......
...@@ -7,8 +7,13 @@ from contextlib import asynccontextmanager ...@@ -7,8 +7,13 @@ from contextlib import asynccontextmanager
from typing import Any, AsyncIterator, Optional, Tuple from typing import Any, AsyncIterator, Optional, Tuple
import pytest import pytest
import tritonclient.grpc.model_config_pb2 as mc
from tritonclient.utils import InferenceServerException try:
import tritonclient.grpc.model_config_pb2 as mc
from tritonclient.utils import InferenceServerException
except ImportError:
mc = None
InferenceServerException = None
from dynamo.llm import KserveGrpcService, ModelRuntimeConfig, PythonAsyncEngine from dynamo.llm import KserveGrpcService, ModelRuntimeConfig, PythonAsyncEngine
......
...@@ -15,10 +15,6 @@ requires-python = ">=3.10" ...@@ -15,10 +15,6 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"ai-dynamo-runtime==1.0.0", "ai-dynamo-runtime==1.0.0",
"transformers>=4.56.0", "transformers>=4.56.0",
"pytest>=8.3.4",
"types-aiofiles>=24.1.0",
"types-psutil>=7.0.0.20250218",
"types-requests>=2.32.4.20260107",
"kubernetes>=32.0.1,<33.0.0", "kubernetes>=32.0.1,<33.0.0",
"fastapi>=0.115.0", "fastapi>=0.115.0",
"distro", "distro",
...@@ -28,6 +24,7 @@ dependencies = [ ...@@ -28,6 +24,7 @@ dependencies = [
"click<8.2.0", "click<8.2.0",
"setuptools", "setuptools",
"prometheus_client>=0.23.1,<1.0", "prometheus_client>=0.23.1,<1.0",
"msgpack==1.1.2",
] ]
classifiers = [ classifiers = [
...@@ -52,7 +49,6 @@ Repository = "https://github.com/ai-dynamo/dynamo.git" ...@@ -52,7 +49,6 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
[project.optional-dependencies] [project.optional-dependencies]
trtllm =[ trtllm =[
"uvloop", "uvloop",
"msgpack==1.1.2",
"tensorrt-llm==1.3.0rc5.post1", "tensorrt-llm==1.3.0rc5.post1",
] ]
......
...@@ -139,11 +139,19 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -139,11 +139,19 @@ RUN --mount=type=cache,target=/root/.cache/uv \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl \
&& rm -rf /opt/dynamo/wheelhouse && rm -rf /opt/dynamo/wheelhouse
# Install common and test dependencies # Install runtime dependencies (common + vllm-specific + planner + benchmarks) and test dependencies
COPY container/deps/requirements.txt /tmp/requirements.txt COPY container/deps/requirements.common.txt /tmp/requirements.common.txt
COPY container/deps/requirements.vllm.txt /tmp/requirements.vllm.txt
COPY container/deps/requirements.planner.txt /tmp/requirements.planner.txt
COPY container/deps/requirements.benchmark.txt /tmp/requirements.benchmark.txt
COPY container/deps/requirements.test.txt /tmp/requirements.test.txt COPY container/deps/requirements.test.txt /tmp/requirements.test.txt
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt && \ uv pip install \
--requirement /tmp/requirements.common.txt \
--requirement /tmp/requirements.vllm.txt \
--requirement /tmp/requirements.planner.txt \
--requirement /tmp/requirements.benchmark.txt \
--requirement /tmp/requirements.test.txt && \
rm /tmp/requirements*.txt rm /tmp/requirements*.txt
# Copy workspace files # Copy workspace files
......
...@@ -15,7 +15,6 @@ import time ...@@ -15,7 +15,6 @@ import time
import pytest import pytest
import requests import requests
import tritonclient.grpc as grpcclient
from tests.utils.constants import QWEN from tests.utils.constants import QWEN
from tests.utils.managed_process import DynamoFrontendProcess, ManagedProcess from tests.utils.managed_process import DynamoFrontendProcess, ManagedProcess
...@@ -80,6 +79,8 @@ def check_grpc_server_ready( ...@@ -80,6 +79,8 @@ def check_grpc_server_ready(
Raises: Raises:
Exception: If server is not ready after max_attempts Exception: If server is not ready after max_attempts
""" """
import tritonclient.grpc as grpcclient
for attempt in range(max_attempts): for attempt in range(max_attempts):
try: try:
client = grpcclient.InferenceServerClient(f"localhost:{port}") client = grpcclient.InferenceServerClient(f"localhost:{port}")
......
...@@ -20,8 +20,16 @@ from functools import partial ...@@ -20,8 +20,16 @@ from functools import partial
import numpy as np import numpy as np
import pytest import pytest
import triton_echo_client
import tritonclient.grpc as grpcclient try:
import tritonclient.grpc as grpcclient
except ImportError:
grpcclient = None
try:
import triton_echo_client
except ImportError:
triton_echo_client = None
from tests.utils.constants import QWEN from tests.utils.constants import QWEN
from tests.utils.managed_process import ManagedProcess from tests.utils.managed_process import ManagedProcess
......
...@@ -16,7 +16,11 @@ import shutil ...@@ -16,7 +16,11 @@ import shutil
import numpy as np import numpy as np
import pytest import pytest
import tritonclient.grpc as grpcclient
try:
import tritonclient.grpc as grpcclient
except ImportError:
grpcclient = None
from tests.utils.managed_process import ManagedProcess from tests.utils.managed_process import ManagedProcess
......
...@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann ...@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann
The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales
prefill/decode workers based on TTFT, ITL, and request patterns. prefill/decode workers based on TTFT, ITL, and request patterns.
To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt` To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.common.txt -r container/deps/requirements.planner.txt`
## Pre-Requisite: Pre-Deployment Profiling Data ## Pre-Requisite: Pre-Deployment Profiling Data
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment