Unverified Commit 47477909 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: deprecate sdk as dependency (#2149)

parent 095ea3e7
...@@ -52,10 +52,6 @@ export CARGO_TARGET_DIR=$HOME/dynamo/.build/target ...@@ -52,10 +52,6 @@ export CARGO_TARGET_DIR=$HOME/dynamo/.build/target
cargo build --locked --profile dev --features mistralrs cargo build --locked --profile dev --features mistralrs
cargo doc --no-deps cargo doc --no-deps
# create symlinks for the binaries in the deploy directory
mkdir -p $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin
ln -sf $HOME/dynamo/.build/target/debug/dynamo-run $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run
# install the python bindings # install the python bindings
cd $HOME/dynamo/lib/bindings/python && retry maturin develop cd $HOME/dynamo/lib/bindings/python && retry maturin develop
......
...@@ -89,6 +89,4 @@ generated-values.yaml ...@@ -89,6 +89,4 @@ generated-values.yaml
TensorRT-LLM TensorRT-LLM
# Local build artifacts for devcontainer # Local build artifacts for devcontainer
.build/ .build/
# Copied binaries to ignore \ No newline at end of file
deploy/sdk/src/dynamo/sdk/cli/bin
...@@ -115,14 +115,6 @@ dynamo-build: ...@@ -115,14 +115,6 @@ dynamo-build:
RUN cargo build --release --locked --features llamacpp,cuda && \ RUN cargo build --release --locked --features llamacpp,cuda && \
cargo doc --no-deps cargo doc --no-deps
# Create symlinks for wheel building
RUN mkdir -p /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/ && \
# Remove existing symlinks
rm -f /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/* && \
# Create new symlinks pointing to the correct location
ln -sf /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run
RUN cd /workspace/lib/bindings/python && \ RUN cd /workspace/lib/bindings/python && \
uv build --wheel --out-dir /workspace/dist --python 3.12 uv build --wheel --out-dir /workspace/dist --python 3.12
RUN cd /workspace && \ RUN cd /workspace && \
......
...@@ -19,10 +19,12 @@ __all__ = [ ...@@ -19,10 +19,12 @@ __all__ = [
"KubernetesConnector", "KubernetesConnector",
"LoadPlannerDefaults", "LoadPlannerDefaults",
"SLAPlannerDefaults", "SLAPlannerDefaults",
"ServiceConfig",
] ]
# Import the classes # Import the classes
from dynamo.planner.circusd import CircusController from dynamo.planner.circusd import CircusController
from dynamo.planner.config import ServiceConfig
from dynamo.planner.defaults import LoadPlannerDefaults, SLAPlannerDefaults from dynamo.planner.defaults import LoadPlannerDefaults, SLAPlannerDefaults
from dynamo.planner.kubernetes_connector import KubernetesConnector from dynamo.planner.kubernetes_connector import KubernetesConnector
from dynamo.planner.planner_connector import PlannerConnector from dynamo.planner.planner_connector import PlannerConnector
...@@ -20,9 +20,9 @@ import tempfile ...@@ -20,9 +20,9 @@ import tempfile
import yaml import yaml
from dynamo.planner.config import ServiceConfig
from dynamo.planner.defaults import SLAPlannerDefaults from dynamo.planner.defaults import SLAPlannerDefaults
from dynamo.runtime import DistributedRuntime, dynamo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
from dynamo.sdk.lib.config import ServiceConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -258,7 +258,7 @@ COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin ...@@ -258,7 +258,7 @@ COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
USER $USERNAME USER $USERNAME
ENV HOME=/home/$USERNAME ENV HOME=/home/$USERNAME
ENV PYTHONPATH=/workspace/dynamo/deploy/sdk/src:/workspace/dynamo/components/planner/src:/workspace/examples/sglang:$PYTHONPATH ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang:$PYTHONPATH
WORKDIR $HOME WORKDIR $HOME
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history # https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
...@@ -324,7 +324,6 @@ COPY rust-toolchain.toml /workspace/ ...@@ -324,7 +324,6 @@ COPY rust-toolchain.toml /workspace/
COPY lib/ /workspace/lib/ COPY lib/ /workspace/lib/
COPY components /workspace/components COPY components /workspace/components
COPY launch /workspace/launch COPY launch /workspace/launch
COPY deploy/sdk /workspace/deploy/sdk
RUN cargo build \ RUN cargo build \
--release \ --release \
...@@ -375,7 +374,7 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la ...@@ -375,7 +374,7 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc echo "cat ~/.launch_screen" >> ~/.bashrc
ENV PYTHONPATH=/workspace/dynamo/deploy/sdk/src:/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH
######################################## ########################################
########## Development Image ########### ########## Development Image ###########
......
...@@ -122,8 +122,6 @@ ARG CARGO_BUILD_JOBS ...@@ -122,8 +122,6 @@ ARG CARGO_BUILD_JOBS
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
RUN cargo build --release RUN cargo build --release
RUN mkdir -p deploy/sdk/src/dynamo/sdk/cli/bin
RUN cp target/release/dynamo-run deploy/sdk/src/dynamo/sdk/cli/bin
RUN cd lib/bindings/python && pip install --break-system-packages -e . && cd ../../.. RUN cd lib/bindings/python && pip install --break-system-packages -e . && cd ../../..
RUN pip install --break-system-packages -e . RUN pip install --break-system-packages -e .
......
...@@ -278,7 +278,6 @@ COPY rust-toolchain.toml /workspace/ ...@@ -278,7 +278,6 @@ COPY rust-toolchain.toml /workspace/
COPY lib/ /workspace/lib/ COPY lib/ /workspace/lib/
COPY components /workspace/components COPY components /workspace/components
COPY launch /workspace/launch COPY launch /workspace/launch
COPY deploy/sdk /workspace/deploy/sdk
RUN cargo build \ RUN cargo build \
--release \ --release \
......
...@@ -287,7 +287,7 @@ RUN uv pip install maturin[patchelf] ...@@ -287,7 +287,7 @@ RUN uv pip install maturin[patchelf]
USER $USERNAME USER $USERNAME
ENV HOME=/home/$USERNAME ENV HOME=/home/$USERNAME
ENV PYTHONPATH=$HOME/dynamo/deploy/sdk/src:$PYTHONPATH:$HOME/dynamo/components/planner/src:$PYTHONPATH ENV PYTHONPATH=$PYTHONPATH:$HOME/dynamo/components/planner/src:$PYTHONPATH
ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target
WORKDIR $HOME WORKDIR $HOME
...@@ -354,7 +354,6 @@ COPY rust-toolchain.toml /workspace/ ...@@ -354,7 +354,6 @@ COPY rust-toolchain.toml /workspace/
COPY lib/ /workspace/lib/ COPY lib/ /workspace/lib/
COPY components /workspace/components COPY components /workspace/components
COPY launch /workspace/launch COPY launch /workspace/launch
COPY deploy/sdk /workspace/deploy/sdk
RUN cargo build \ RUN cargo build \
--release \ --release \
......
...@@ -121,14 +121,6 @@ go test ./... -v ...@@ -121,14 +121,6 @@ go test ./... -v
go test -race ./... go test -race ./...
``` ```
**Python Tests (SDK):**
```bash
cd deploy/sdk
pytest tests/ -v
pytest tests/ --cov=dynamo.sdk
```
### Integration Tests ### Integration Tests
**End-to-End Deployment Tests:** **End-to-End Deployment Tests:**
......
# Dynamo SDK
Dynamo is a python based SDK for building and deploying distributed inference applications. Dynamo leverages concepts from open source projects like [BentoML](https://github.com/bentoml/bentoml) to provide a developer friendly experience to go from local development to K8s deployment.
## Installation
```bash
pip install ai-dynamo
```
## Quickstart
Lets build a simple distributed pipeline with 3 components: `Frontend`, `Middle` and `Backend`. The structure of the pipeline looks like this:
```
Users/Clients (HTTP)
┌─────────────┐
│ Frontend │ HTTP API endpoint (/generate)
└─────────────┘
┌─────────────┐
│ Middle │
└─────────────┘
┌─────────────┐
│ Backend │
└─────────────┘
```
The code for the pipeline looks like this:
```python
# filename: pipeline.py
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from dynamo.sdk import DYNAMO_IMAGE, depends, endpoint, service, api
class RequestType(BaseModel):
text: str
class ResponseType(BaseModel):
text: str
@service(
dynamo={"namespace": "inference"},
)
class Backend:
@endpoint()
async def generate(self, req: RequestType):
text = f"{req.text}-back"
for token in text.split():
yield f"Backend: {token}"
@service(
dynamo={"namespace": "inference"},
)
class Middle:
backend = depends(Backend)
@endpoint()
async def generate(self, req: RequestType):
text = f"{req.text}-mid"
next_request = RequestType(text=text).model_dump_json()
async for response in self.backend.generate(next_request):
yield f"Middle: {response}"
app = FastAPI(title="Hello World!")
@service(
dynamo={"namespace": "inference"},
app=app,
)
class Frontend:
middle = depends(Middle)
@api()
async def generate(self, request: RequestType):
async def content_generator():
async for response in self.middle.generate(request.model_dump_json()):
yield f"Frontend: {response}"
return StreamingResponse(content_generator())
```
You can run this pipeline locally by spinning up ETCD and NATS and then running the pipeline:
```bash
# Spin up ETCD and NATS
docker compose -f deploy/docker-compose.yml up -d
```
then
```bash
# Run the pipeline
dynamo serve pipeline:Frontend
```
Once it's up and running, you can make a request to the pipeline using
```bash
curl -X POST http://localhost:8080/generate \
-H "Content-Type: application/json" \
-d '{"text": "federer"}'
```
You should see the following output
```bash
federer-mid-back
```
You can find in-depth documentation for the Dynamo SDK [here](./docs/sdk/README.md) and the Dynamo CLI [here](./docs/cli/README.md)
Please refer to [hello_world](../../../examples/hello_world/README.md) and [llm](../../../examples/llm/README.md) for examples.
../../../../docs/guides/cli_overview.md
\ No newline at end of file
../../../../docs/API/sdk.md
\ No newline at end of file
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
from typing import Any
warnings.filterwarnings("ignore", category=UserWarning, message=".*pkg_resources.*")
# flake8: noqa: E402
from dynamo.sdk.core.decorators.endpoint import abstract_endpoint, api, endpoint
from dynamo.sdk.core.lib import DYNAMO_IMAGE, depends, liveness, readiness, service
from dynamo.sdk.core.protocol.interface import AbstractService
from dynamo.sdk.lib.decorators import async_on_start, on_shutdown
from dynamo.sdk.lib.utils import get_capi_library_path
dynamo_context: dict[str, Any] = {}
__all__ = [
"DYNAMO_IMAGE",
"on_shutdown",
"async_on_start",
"depends",
"dynamo_context",
"endpoint",
"api",
"service",
"AbstractService",
"abstract_endpoint",
"liveness",
"readiness",
"get_capi_library_path",
]
# Use ARG to allow base image to be specified at build time
ARG BASE_IMAGE=__BASE_IMAGE__
FROM ${BASE_IMAGE}
# Build arguments for user configuration
ARG USER_ID=1024
ARG GROUP_ID=1024
ARG USERNAME=dynamo
ARG GROUPNAME=dynamo
ARG HOME_DIR=/home/${USERNAME}
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV PATH="${HOME_DIR}/.local/bin:$PATH"
ENV PYTHONPATH="${HOME_DIR}/app:$PYTHONPATH"
# Create group and user
RUN if [ "$(id -u)" != "0" ]; then \
echo "Using sudo for user/group creation"; \
sudo groupadd --gid ${GROUP_ID} ${GROUPNAME} \
&& sudo useradd --uid ${USER_ID} --gid ${GROUP_ID} --create-home --shell /bin/bash ${USERNAME} \
&& sudo mkdir -p ${HOME_DIR}/app \
&& sudo mkdir -p ${HOME_DIR}/.local/bin \
&& sudo mkdir -p ${HOME_DIR}/.cache/pip \
&& sudo chown -R ${USERNAME}:${GROUPNAME} ${HOME_DIR}; \
else \
echo "Running as root, no sudo needed"; \
groupadd --gid ${GROUP_ID} ${GROUPNAME} \
&& useradd --uid ${USER_ID} --gid ${GROUP_ID} --create-home --shell /bin/bash ${USERNAME} \
&& mkdir -p ${HOME_DIR}/app \
&& mkdir -p ${HOME_DIR}/.local/bin \
&& mkdir -p ${HOME_DIR}/.cache/pip \
&& chown -R ${USERNAME}:${GROUPNAME} ${HOME_DIR}; \
fi
# Switch to non-root user
USER ${USERNAME}
WORKDIR ${HOME_DIR}/app
# Copy application code
COPY --chown=${USERNAME}:${GROUPNAME} . .
RUN chmod +x ${HOME_DIR}/app
# SPDX-FileCopyrightText: Copyright (c) 2020 Atalaya Tech. Inc
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
from __future__ import annotations
import logging
import os
from typing import Any
from dynamo.sdk.core.protocol.interface import ServiceInterface
# Import our own resource module
from dynamo.sdk.lib.resource import (
NVIDIA_GPU,
GPUManager,
ResourceError,
system_resources,
)
logger = logging.getLogger(__name__)
# Constants
DYN_DISABLE_AUTO_GPU_ALLOCATION = "DYN_DISABLE_AUTO_GPU_ALLOCATION"
DYN_DEPLOYMENT_ENV = "DYN_DEPLOYMENT_ENV"
logger = logging.getLogger(__name__)
def format_memory_gb(memory_bytes: float) -> str:
"""Convert memory from bytes to formatted GB string.
Args:
memory_bytes: Memory size in bytes
Returns:
Formatted string with memory size in GB with 1 decimal place
"""
return f"{memory_bytes/1024/1024/1024:.1f}GB"
class ResourceAllocator:
def __init__(self) -> None:
"""Initialize the resource allocator."""
self.system_resources = system_resources()
self.gpu_manager = GPUManager()
self.remaining_gpus = len(self.system_resources[NVIDIA_GPU])
# For compatibility with the old implementation
self._available_gpus: list[tuple[float, float]] = [
(1.0, 1.0) # each item is (remaining, unit)
for _ in range(self.remaining_gpus)
]
self._service_gpu_allocations: dict[str, list[int]] = {}
logger.debug(
f"ResourceAllocator initialized with {self.remaining_gpus} GPUs available"
)
def assign_gpus(self, count: float, service_name: str = "") -> list[int]:
"""
Assign GPUs for use.
Args:
count: Number of GPUs to assign (can be fractional)
Returns:
List of GPU indices that were assigned
"""
if count > self.remaining_gpus:
logger.warning(
f"Requested {count} GPUs, but only {self.remaining_gpus} are remaining. "
f"Serving may fail due to inadequate GPUs. Set {DYN_DISABLE_AUTO_GPU_ALLOCATION}=1 "
"to disable automatic allocation and allocate GPUs manually."
)
self.remaining_gpus = int(max(0, self.remaining_gpus - count))
assigned = [] # Will store assigned GPU indices
if count < 1: # a fractional GPU
try:
# try to find the GPU used with the same fragment
gpu = next(
i
for i, v in enumerate(self._available_gpus)
if v[0] > 0 and v[1] == count
)
except StopIteration:
try:
gpu = next(
i for i, v in enumerate(self._available_gpus) if v[0] == 1.0
)
except StopIteration:
gpu = len(self._available_gpus)
self._available_gpus.append((1.0, count))
remaining, _ = self._available_gpus[gpu]
if (remaining := remaining - count) < count:
# can't assign to the next one, mark it as zero.
self._available_gpus[gpu] = (0.0, count)
else:
self._available_gpus[gpu] = (remaining, count)
assigned = [gpu]
else: # allocate n GPUs, n is a positive integer
if int(count) != count:
raise ResourceError("Float GPUs larger than 1 is not supported")
count = int(count)
unassigned = [
gpu
for gpu, value in enumerate(self._available_gpus)
if value[0] > 0 and value[1] == 1.0
]
if len(unassigned) < count:
logger.warning(f"Not enough GPUs to be assigned, {count} is requested")
for _ in range(count - len(unassigned)):
unassigned.append(len(self._available_gpus))
self._available_gpus.append((1.0, 1.0))
for gpu in unassigned[:count]:
self._available_gpus[gpu] = (0.0, 1.0)
assigned = unassigned[:count]
# Store the allocation if service_name is provided
if service_name and assigned:
if service_name in self._service_gpu_allocations:
self._service_gpu_allocations[service_name].extend(assigned)
logger.debug(
f"Additional GPUs {assigned} allocated to service '{service_name}', "
f"total GPUs: {self._service_gpu_allocations[service_name]}"
)
else:
self._service_gpu_allocations[service_name] = assigned
logger.debug(f"GPUs {assigned} allocated to service '{service_name}'")
elif assigned:
logger.debug(f"GPUs {assigned} allocated without service name tracking")
return assigned
def get_gpu_stats(self) -> list[dict[str, Any]]:
"""Get detailed statistics for all GPUs."""
return self.gpu_manager.get_gpu_stats()
def get_resource_envs(
self,
service: ServiceInterface[Any],
) -> tuple[int, list[dict[str, str]]]:
"""
Get resource environment variables for a service.
Args:
service: The service to get resource environment variables for
Returns:
Tuple of (number of workers, list of environment variables dictionaries)
"""
logger.info(f"Getting resource envs for service {service.name}")
services = service.get_service_configs()
if service.name not in services:
logger.warning(f"No service configs found for {service.name}")
return 1, [] # Default to 1 worker, no special resources
config = services[service.name]
logger.debug(f"Using config for {service.name}: {config}")
num_gpus = 0
num_workers = 1
resource_envs: list[dict[str, str]] = []
# Check if service requires GPUs
if "gpu" in (config.get("resources") or {}):
num_gpus = int(config["resources"]["gpu"]) # type: ignore
logger.info(f"GPU requirement found: {num_gpus}")
# Check if we have enough GPUs
available_gpus = self.gpu_manager.get_available_gpus()
if num_gpus > len(available_gpus):
logger.warning(
f"Requested {num_gpus} GPUs, but only {len(available_gpus)} are available. "
f"Service may fail due to inadequate GPU resources."
)
# Determine number of workers
if config.get("workers"):
num_workers = config["workers"]
logger.info(f"Using configured worker count: {num_workers}")
# Handle GPU allocation
if num_gpus and DYN_DISABLE_AUTO_GPU_ALLOCATION not in os.environ:
logger.info("GPU allocation enabled")
if os.environ.get(DYN_DEPLOYMENT_ENV):
logger.info("K8s deployment detected")
# K8s replicas: Assumes DYNAMO_DEPLOYMENT_ENV is set
# each pod in replicaset will have separate GPU with same CUDA_VISIBLE_DEVICES
assigned = self.assign_gpus(num_gpus, service.name)
logger.info(f"Assigned GPUs for K8s: {assigned}")
# Generate environment variables for each worker
for _ in range(num_workers):
env_vars = {"CUDA_VISIBLE_DEVICES": ",".join(map(str, assigned))}
resource_envs.append(env_vars)
else:
logger.info(
f"Local deployment detected. Allocating GPUs for {num_workers} workers of '{service.name}'"
)
# Local deployment where we split all available GPUs across workers
for worker_id in range(num_workers):
assigned = self.assign_gpus(num_gpus, service.name)
logger.debug(
f"Worker {worker_id} of '{service.name}' assigned GPUs: {assigned}"
)
# Generate environment variables for this worker
env_vars = {"CUDA_VISIBLE_DEVICES": ",".join(map(str, assigned))}
# If we have comprehensive GPU stats, log them
try:
gpu_stats = [
stat
for stat in self.get_gpu_stats()
if stat["index"] in assigned
]
for stat in gpu_stats:
logger.info(
f"GPU {stat['index']} ({stat['name']}): "
f"Memory: {format_memory_gb(stat['free_memory'])} free / "
f"{format_memory_gb(stat['total_memory'])} total, "
f"Utilization: {stat['gpu_utilization']}% "
)
except Exception as e:
logger.debug(f"Failed to get GPU stats: {e}")
resource_envs.append(env_vars)
logger.info(
f"Final resource allocation - workers: {num_workers}, envs: {resource_envs}"
)
return num_workers, resource_envs
def reset_allocations(self):
"""Reset all GPU allocations."""
self.gpu_manager.reset_allocations()
# Reset legacy tracking
self._available_gpus = [(1.0, 1.0) for _ in range(self.remaining_gpus)]
# SPDX-FileCopyrightText: Copyright (c) 2020 Atalaya Tech. Inc
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
# Once planner v1 goes live - this will be be full of more granular APIs
from __future__ import annotations
import contextlib
import os
import pathlib
import shlex
import sys
from dataclasses import dataclass
from typing import Any, Callable
import psutil
from circus.arbiter import Arbiter as _Arbiter
from circus.sockets import CircusSocket
from circus.watcher import Watcher
from .utils import ServiceProtocol, reserve_free_port
class Arbiter(_Arbiter):
"""Arbiter with cleanup support via exit_stack."""
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.exit_stack = contextlib.ExitStack()
def start(self, cb: Callable[[Any], Any] | None = None) -> None:
"""Start arbiter and enter context."""
self.exit_stack.__enter__()
fut = super().start(cb)
if exc := fut.exception():
raise exc
def stop(self) -> None:
"""Stop arbiter and cleanup resources."""
self.exit_stack.__exit__(None, None, None)
return super().stop()
@dataclass
class CircusRunner:
"""Simple server wrapper for arbiter lifecycle management."""
arbiter: Arbiter
def stop(self) -> None:
self.arbiter.stop()
@property
def running(self) -> bool:
return self.arbiter.running
def __enter__(self) -> CircusRunner:
return self
def __exit__(self, *_: Any) -> None:
self.stop()
MAX_AF_UNIX_PATH_LENGTH = 103
def create_circus_watcher(
name: str,
args: list[str],
*,
cmd: str = sys.executable,
use_sockets: bool = True,
**kwargs: Any,
) -> Watcher:
log_dir = os.environ.get("DYN_CIRCUS_LOG_DIR", None)
if log_dir is not None:
prefix = f"{log_dir}/{name}"
os.makedirs(prefix, exist_ok=True)
stdout_stream = {
"class": "FileStream",
"filename": f"{prefix}/output.log",
"backup_count": 10,
}
stderr_stream = {
"class": "FileStream",
"filename": f"{prefix}/error.log",
"backup_count": 10,
}
else:
stdout_stream = None
stderr_stream = None
return Watcher(
name=name,
cmd=shlex.quote(cmd) if psutil.POSIX else cmd,
args=args,
copy_env=True,
stop_children=True,
use_sockets=use_sockets,
graceful_timeout=86400,
respawn=os.environ.get("DYN_CIRCUS_RESPAWN", "false").lower()
in ("true", "1", "yes"),
stdout_stream=stdout_stream,
stderr_stream=stderr_stream,
**kwargs,
)
def get_env_or_reserved_port(env_var):
port_env = os.environ.get(env_var)
if port_env:
return int(port_env)
else:
with reserve_free_port() as port: # type: ignore
return port
def create_arbiter(
watchers: list[Watcher], *, threaded: bool = False, **kwargs: Any
) -> Arbiter:
endpoint_port = get_env_or_reserved_port("DYN_CIRCUS_ENDPOINT_PORT")
pubsub_port = get_env_or_reserved_port("DYN_CIRCUS_PUBSUB_PORT")
return Arbiter(
watchers,
endpoint=f"tcp://127.0.0.1:{endpoint_port}",
pubsub_endpoint=f"tcp://127.0.0.1:{pubsub_port}",
check_delay=kwargs.pop("check_delay", 10),
**kwargs,
)
def path_to_uri(path: str) -> str:
"""
Convert a path to a URI.
Args:
path: Path to convert to URI.
Returns:
URI string. (quoted, absolute)
"""
return pathlib.PurePosixPath(path).as_uri()
def _get_server_socket(
service: ServiceProtocol,
uds_path: str,
) -> tuple[str, CircusSocket]:
"""Create a Unix Domain Socket for a service.
Args:
service: The service to create a socket for
uds_path: Base directory for Unix Domain Sockets
port_stack: Not used in POSIX implementation, kept for interface compatibility
Returns:
Tuple of (socket URI, CircusSocket object)
Raises:
AssertionError: If socket path exceeds maximum length
"""
socket_path = os.path.join(uds_path, f"{id(service)}.sock")
assert (
len(socket_path) < MAX_AF_UNIX_PATH_LENGTH
), f"Socket path '{socket_path}' exceeds maximum length of {MAX_AF_UNIX_PATH_LENGTH}"
return path_to_uri(socket_path), CircusSocket(name=service.name, path=socket_path)
# SPDX-FileCopyrightText: Copyright (c) 2020 Atalaya Tech. Inc
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
from __future__ import annotations
import importlib.metadata
import typer
from rich.console import Console
from dynamo.sdk.cli.env import env
from dynamo.sdk.cli.run import run
from dynamo.sdk.cli.serve import serve
console = Console()
cli = typer.Typer(
context_settings={"help_option_names": ["-h", "--help"]},
name="dynamo",
no_args_is_help=True,
pretty_exceptions_enable=False,
)
def version_callback(value: bool):
if value:
version = importlib.metadata.version("ai-dynamo")
console.print(
f"[bold green]Dynamo CLI[/bold green] version: [cyan]{version}[/cyan]"
)
raise typer.Exit()
@cli.callback()
def main(
version: bool = typer.Option(
False,
"--version",
"-v",
help="Show the application version and exit.",
callback=version_callback,
is_eager=True,
),
):
"""
The Dynamo CLI is a CLI for serving, containerizing, and deploying Dynamo applications.
At a high level, you use `serve` to run a set of dynamo services locally,
`build` and `containerize` to package them up for deployment, and then `cloud`
and `deploy` to deploy them to a K8s cluster running the Dynamo Cloud
"""
cli.command()(env)
cli.command(
context_settings={"allow_extra_args": True, "ignore_unknown_options": True}
)(serve)
cli.command(
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
add_help_option=False,
)(run)
if __name__ == "__main__":
cli()
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import platform
import subprocess
import sys
import distro
import pkg_resources
def get_os_version() -> str:
"""Get OS version."""
# TODO: Revisit once we need to support Windows based systems
return f"{distro.name()} {distro.version()}"
def execute_subprocess_output(command: str) -> str:
"""Execute a subprocess command and return the output."""
try:
out = subprocess.check_output(command, shell=True, stderr=subprocess.DEVNULL)
if not out.strip():
return "N/A"
return out.decode("utf-8").strip()
except subprocess.CalledProcessError:
return "N/A"
def get_glibc_version() -> str:
"""Get GLIBC version."""
return execute_subprocess_output("ldd --version | head -n 1 | awk '{print $NF}'")
def get_gcc_version() -> str:
"""Get GCC version."""
return execute_subprocess_output("gcc --version | head -n 1 | awk '{print $NF}'")
def get_cmake_version() -> str:
"""Get Cmake version."""
return execute_subprocess_output("cmake --version | head -n 1 | awk '{print $NF}'")
def get_rust_version() -> str:
"""Get Rust version."""
return execute_subprocess_output(
"rustc --version | head -n 1 | awk '{print $(NF-2)}'"
)
def get_docker_version() -> str:
"""Get Docker version."""
return execute_subprocess_output("docker --version | awk '{print $3}' | tr -d ','")
def get_cpu_architecture() -> str:
"""Get CPU architecture."""
return execute_subprocess_output("lscpu")
def query_nvidia_smi(param: str) -> str:
"""Get GPU information from nvidia-smi if available"""
return execute_subprocess_output(
f"nvidia-smi --query-gpu={param} --format=csv,noheader"
)
def get_gpu_topo() -> str:
"""Get GPU topology if available"""
return execute_subprocess_output("nvidia-smi topo -m")
def get_cuda_version() -> str:
"""Get CUDA version if available."""
return execute_subprocess_output(r"nvcc --version | grep -Po 'release \K\d+\.\d+'")
def get_python_platform():
return platform.platform()
def get_installed_packages() -> list[tuple[str, str]]:
"""Get list of installed Python packages and their versions."""
return [(pkg.key, pkg.version) for pkg in pkg_resources.working_set]
def get_python_packages() -> str:
"""Get list of specified Python packages and their versions."""
installed_packages = get_installed_packages()
out = []
search_python_packages = [
"ai-dynamo",
"ai-dynamo-runtime",
"ai-dynamo-vllm",
"genai-perf",
"nixl",
"numpy",
"nvidia-cublas-cu12",
"nvidia-cuda-cupti-cu12",
"nvidia-cuda-nvrtc-cu12",
"nvidia-cuda-runtime-cu12",
"nvidia-cudnn-cu12",
"nvidia-cufft-cu12",
"nvidia-curand-cu12",
"nvidia-cusolver-cu12",
"nvidia-cusparse-cu12",
"nvidia-ml-py",
"nvidia-nccl-cu12",
"nvidia-nvjitlink-cu12",
"nvidia-nvtx-cu12",
"pyzmq",
"tensorrt_llm",
"torch",
"torchaudio",
"transformers",
"tritonclient",
]
for pkg_name in search_python_packages:
version = next(
(version for name, version in installed_packages if name == pkg_name), None
)
if version:
out.append(f"{pkg_name}: {version}")
else:
out.append(f"{pkg_name}: Not installed")
return "\n".join(out)
def env() -> None:
"""Display information about the current environment."""
print("System Information:")
print(f"OS: {get_os_version()}")
print(f"Glibc Version: {get_glibc_version()}")
print(f"GCC Version: {get_gcc_version()}")
print(f"Cmake Version: {get_cmake_version()}")
print(f"Rust Version: {get_rust_version()}")
print(f"Docker Version: {get_docker_version()}")
print("\nCPU Information:")
print(f"{get_cpu_architecture()}")
# Python Environment
py_version = sys.version.split()[0]
print(f"\nPython Version: {py_version}")
print(f"Python Platform: {get_python_platform()}")
print("\nPython Packages:")
print(f"{get_python_packages()}")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment